diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 135aaf13c..493c26d3a 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -565,7 +565,7 @@ class OpenAIServingChat(OpenAIServing):
)
tool_parsers: list[ToolParser | None] = [
- self.tool_parser(tokenizer)
+ self.tool_parser(tokenizer, request.tools)
] * num_choices
else:
tool_parsers = [None] * num_choices
@@ -1331,7 +1331,7 @@ class OpenAIServingChat(OpenAIServing):
"Tokenizer not available when `skip_tokenizer_init=True`"
)
- tool_parser = self.tool_parser(tokenizer)
+ tool_parser = self.tool_parser(tokenizer, request.tools)
# NOTE: We use token_ids for openai tool parser
tool_call_info = tool_parser.extract_tool_calls(
"",
diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py
index d8df1d3c4..eb7e00344 100644
--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -925,7 +925,7 @@ class OpenAIServing:
# Automatic Tool Call Parsing
try:
- tool_parser = tool_parser_cls(tokenizer)
+ tool_parser = tool_parser_cls(tokenizer, request.tools)
except RuntimeError as e:
logger.exception("Error in tool parser creation.")
raise e
diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py
index e3d7c588a..a31f20501 100644
--- a/vllm/entrypoints/openai/parser/responses_parser.py
+++ b/vllm/entrypoints/openai/parser/responses_parser.py
@@ -52,7 +52,7 @@ class ResponsesParser:
self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
self.tool_parser_instance = None
if tool_parser_cls is not None:
- self.tool_parser_instance = tool_parser_cls(tokenizer)
+ self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)
# Store the last finish_reason to determine response status
self.finish_reason: str | None = None
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index e71a62461..df94848e3 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -1344,7 +1344,7 @@ class OpenAIServingResponses(OpenAIServing):
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
tool_parser = None
if self.parser and self.parser.tool_parser_cls:
- tool_parser = self.parser.tool_parser_cls(tokenizer)
+ tool_parser = self.parser.tool_parser_cls(tokenizer, request.tools)
reasoning_ended = False
tool_call_text_started = False
previous_text = ""
diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py
index 6009b666d..52f03447d 100644
--- a/vllm/entrypoints/serve/render/serving.py
+++ b/vllm/entrypoints/serve/render/serving.py
@@ -545,6 +545,8 @@ class OpenAIServingRender:
)
raise NotImplementedError(msg)
tokenizer = renderer.get_tokenizer()
- request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore[arg-type]
+ request = tool_parser(tokenizer, request.tools).adjust_request(
+ request=request # type: ignore[arg-type]
+ )
return conversation, [engine_input]
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index a2c2f0627..dcfe45d38 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -5,13 +5,18 @@ import importlib
import os
from collections.abc import Callable, Sequence
from functools import cached_property
+from typing import TypeAlias
from openai.types.responses import (
ResponseFormatTextJSONSchemaConfig,
ResponseTextConfig,
)
+from openai.types.responses.tool import Tool as ResponsesTool
-from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.chat_completion.protocol import (
+ ChatCompletionRequest,
+ ChatCompletionToolsParam,
+)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
@@ -30,6 +35,8 @@ from vllm.utils.import_utils import import_from_path
logger = init_logger(__name__)
+Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool
+
class ToolParser:
"""
@@ -38,7 +45,11 @@ class ToolParser:
derived classes.
"""
- def __init__(self, tokenizer: TokenizerLike):
+ def __init__(
+ self,
+ tokenizer: TokenizerLike,
+ tools: list[Tool] | None = None,
+ ):
self.prev_tool_call_arr: list[dict] = []
# the index of the tool call that is currently being parsed
self.current_tool_id: int = -1
@@ -46,6 +57,7 @@ class ToolParser:
self.streamed_args_for_tool: list[str] = []
self.model_tokenizer = tokenizer
+ self.tools = tools
@cached_property
def vocab(self) -> dict[str, int]:
diff --git a/vllm/tool_parsers/deepseekv31_tool_parser.py b/vllm/tool_parsers/deepseekv31_tool_parser.py
index ad42bb771..e4ade3aae 100644
--- a/vllm/tool_parsers/deepseekv31_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv31_tool_parser.py
@@ -19,14 +19,14 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
class DeepSeekV31ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index cb39a16fd..e86929944 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -43,8 +44,8 @@ class DeepSeekV32ToolParser(ToolParser):
|DSML|function_calls>
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/deepseekv3_tool_parser.py b/vllm/tool_parsers/deepseekv3_tool_parser.py
index 83bba1c87..e92af87e6 100644
--- a/vllm/tool_parsers/deepseekv3_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv3_tool_parser.py
@@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,8 +28,8 @@ logger = init_logger(__name__)
class DeepSeekV3ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/ernie45_tool_parser.py b/vllm/tool_parsers/ernie45_tool_parser.py
index d5dc7a3da..9722dddf7 100644
--- a/vllm/tool_parsers/ernie45_tool_parser.py
+++ b/vllm/tool_parsers/ernie45_tool_parser.py
@@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,12 +28,12 @@ logger = init_logger(__name__)
class Ernie45ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
"""
Ernie thinking model format:
abc\n\n\n\n\ndef\n\n
"""
- super().__init__(tokenizer)
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent = False
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id = -1
diff --git a/vllm/tool_parsers/functiongemma_tool_parser.py b/vllm/tool_parsers/functiongemma_tool_parser.py
index 599019b1b..dfd91d974 100644
--- a/vllm/tool_parsers/functiongemma_tool_parser.py
+++ b/vllm/tool_parsers/functiongemma_tool_parser.py
@@ -20,7 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -33,8 +33,8 @@ class FunctionGemmaToolParser(ToolParser):
call:func_name{param:value}
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Streaming state
self.current_tool_name_sent: bool = False
diff --git a/vllm/tool_parsers/gigachat3_tool_parser.py b/vllm/tool_parsers/gigachat3_tool_parser.py
index 90928f9ae..f470f6a5b 100644
--- a/vllm/tool_parsers/gigachat3_tool_parser.py
+++ b/vllm/tool_parsers/gigachat3_tool_parser.py
@@ -20,7 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -46,8 +46,8 @@ ARGS_REGEX = re.compile(
class GigaChat3ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.tool_started: bool = False
self.tool_name_sent: bool = False
self.tool_id: str | None = None
diff --git a/vllm/tool_parsers/glm47_moe_tool_parser.py b/vllm/tool_parsers/glm47_moe_tool_parser.py
index 8c72342d7..765d6d37d 100644
--- a/vllm/tool_parsers/glm47_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm47_moe_tool_parser.py
@@ -16,14 +16,15 @@ import regex as re
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool
from vllm.tool_parsers.glm4_moe_tool_parser import Glm4MoeModelToolParser
logger = init_logger(__name__)
class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# GLM-4.7 format: func_name[...]*
# The function name can be followed by a newline, whitespace, or
# directly by tags (no separator). The arg section is
diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py
index 28d86b68b..fc718921d 100644
--- a/vllm/tool_parsers/glm4_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm4_moe_tool_parser.py
@@ -21,7 +21,6 @@ import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -34,6 +33,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -48,8 +48,8 @@ class Glm4MoeModelToolParser(ToolParser):
rather than waiting for the complete tag.
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Stateful streaming fields
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict[str, Any]] = []
@@ -122,7 +122,7 @@ class Glm4MoeModelToolParser(ToolParser):
def _is_string_type(
tool_name: str,
arg_name: str,
- tools: list[ChatCompletionToolsParam] | None,
+ tools: list[Tool] | None,
) -> bool:
if tools is None:
return False
diff --git a/vllm/tool_parsers/granite4_tool_parser.py b/vllm/tool_parsers/granite4_tool_parser.py
index 693c4dc8f..3d58690f5 100644
--- a/vllm/tool_parsers/granite4_tool_parser.py
+++ b/vllm/tool_parsers/granite4_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -43,8 +44,8 @@ FuncT = TypeVar("FuncT", bound=_FunctionCallCtor)
class Granite4ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id: int = -1
diff --git a/vllm/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/tool_parsers/granite_20b_fc_tool_parser.py
index 7fe3c39f7..6d217a030 100644
--- a/vllm/tool_parsers/granite_20b_fc_tool_parser.py
+++ b/vllm/tool_parsers/granite_20b_fc_tool_parser.py
@@ -24,6 +24,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -46,8 +47,8 @@ class Granite20bFCToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.bot_token = ""
self.tool_start_token = self.bot_token
diff --git a/vllm/tool_parsers/granite_tool_parser.py b/vllm/tool_parsers/granite_tool_parser.py
index 7cad01e16..d586db326 100644
--- a/vllm/tool_parsers/granite_tool_parser.py
+++ b/vllm/tool_parsers/granite_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -44,8 +45,8 @@ class GraniteToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# for granite 3.0, the token `<|tool_call|>`
self.bot_token = "<|tool_call|>"
# for granite 3.1, the string ``
diff --git a/vllm/tool_parsers/hermes_tool_parser.py b/vllm/tool_parsers/hermes_tool_parser.py
index 5bde5b2c0..cca2bf9a0 100644
--- a/vllm/tool_parsers/hermes_tool_parser.py
+++ b/vllm/tool_parsers/hermes_tool_parser.py
@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils.mistral import is_mistral_tokenizer
@@ -31,8 +32,8 @@ logger = init_logger(__name__)
class Hermes2ProToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if is_mistral_tokenizer(tokenizer):
logger.error("Detected Mistral tokenizer when using a Hermes model")
diff --git a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
index 4f446bfcc..29b2a5eae 100644
--- a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
+++ b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import consume_space
@@ -31,8 +32,8 @@ logger = init_logger(__name__)
class HunyuanA13BToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Initialize state for streaming mode
self.prev_tool_calls: list[dict] = []
diff --git a/vllm/tool_parsers/internlm2_tool_parser.py b/vllm/tool_parsers/internlm2_tool_parser.py
index 3b858f34c..fc7c44cff 100644
--- a/vllm/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/tool_parsers/internlm2_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import extract_intermediate_diff
@@ -30,8 +31,8 @@ logger = init_logger(__name__)
class Internlm2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.position = 0
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
diff --git a/vllm/tool_parsers/jamba_tool_parser.py b/vllm/tool_parsers/jamba_tool_parser.py
index 98293a4c1..5a9af9910 100644
--- a/vllm/tool_parsers/jamba_tool_parser.py
+++ b/vllm/tool_parsers/jamba_tool_parser.py
@@ -22,7 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
from vllm.tool_parsers.utils import extract_intermediate_diff
from vllm.utils.mistral import is_mistral_tokenizer
@@ -30,8 +30,8 @@ logger = init_logger(__name__)
class JambaToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if is_mistral_tokenizer(self.model_tokenizer):
raise ValueError(
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index ed4795215..bc995319e 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,8 +28,8 @@ logger = init_logger(__name__)
class KimiK2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id: int = -1
diff --git a/vllm/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/tool_parsers/llama4_pythonic_tool_parser.py
index 93807196d..1921187e9 100644
--- a/vllm/tool_parsers/llama4_pythonic_tool_parser.py
+++ b/vllm/tool_parsers/llama4_pythonic_tool_parser.py
@@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -47,8 +48,12 @@ class Llama4PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/llama_tool_parser.py b/vllm/tool_parsers/llama_tool_parser.py
index 527d3f735..be3d47acd 100644
--- a/vllm/tool_parsers/llama_tool_parser.py
+++ b/vllm/tool_parsers/llama_tool_parser.py
@@ -24,6 +24,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -44,8 +45,12 @@ class Llama3JsonToolParser(ToolParser):
llama4_json are set.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# initialize properties used for state when parsing tool calls in
# streaming mode
diff --git a/vllm/tool_parsers/longcat_tool_parser.py b/vllm/tool_parsers/longcat_tool_parser.py
index 72f13559a..0304f452e 100644
--- a/vllm/tool_parsers/longcat_tool_parser.py
+++ b/vllm/tool_parsers/longcat_tool_parser.py
@@ -4,12 +4,13 @@
import regex as re
from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool
from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
class LongcatFlashToolParser(Hermes2ProToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py
index a9291adc1..6c75e0099 100644
--- a/vllm/tool_parsers/minimax_m2_tool_parser.py
+++ b/vllm/tool_parsers/minimax_m2_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -29,8 +30,8 @@ logger = init_logger(__name__)
class MinimaxM2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/minimax_tool_parser.py b/vllm/tool_parsers/minimax_tool_parser.py
index cb5610fc7..2a2baa03b 100644
--- a/vllm/tool_parsers/minimax_tool_parser.py
+++ b/vllm/tool_parsers/minimax_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import extract_intermediate_diff
@@ -30,8 +31,8 @@ logger = init_logger(__name__)
class MinimaxToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Initialize streaming state for tracking tool call progress
self.streaming_state: dict[str, Any] = {
diff --git a/vllm/tool_parsers/mistral_tool_parser.py b/vllm/tool_parsers/mistral_tool_parser.py
index 56ba245ce..153c6ed32 100644
--- a/vllm/tool_parsers/mistral_tool_parser.py
+++ b/vllm/tool_parsers/mistral_tool_parser.py
@@ -26,6 +26,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils.mistral import is_mistral_tokenizer
@@ -78,8 +79,8 @@ class MistralToolParser(ToolParser):
Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if not is_mistral_tokenizer(self.model_tokenizer):
logger.info("Non-Mistral tokenizer detected when using a Mistral model...")
diff --git a/vllm/tool_parsers/olmo3_tool_parser.py b/vllm/tool_parsers/olmo3_tool_parser.py
index dd63b1086..dcbf0a673 100644
--- a/vllm/tool_parsers/olmo3_tool_parser.py
+++ b/vllm/tool_parsers/olmo3_tool_parser.py
@@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -51,8 +52,12 @@ class Olmo3PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 76f7a49df..ee6dd7071 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -16,6 +16,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -28,8 +29,8 @@ logger = init_logger(__name__)
class OpenAIToolParser(ToolParser):
- def __init__(self, tokenizer: "TokenizerLike"):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: "TokenizerLike", tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
def extract_tool_calls(
self,
diff --git a/vllm/tool_parsers/phi4mini_tool_parser.py b/vllm/tool_parsers/phi4mini_tool_parser.py
index f222cffd6..2dc262bba 100644
--- a/vllm/tool_parsers/phi4mini_tool_parser.py
+++ b/vllm/tool_parsers/phi4mini_tool_parser.py
@@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -35,8 +36,12 @@ class Phi4MiniJsonToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ) -> None:
+ super().__init__(tokenizer, tools)
# initialize properties used for state when parsing tool calls in
# streaming mode
diff --git a/vllm/tool_parsers/pythonic_tool_parser.py b/vllm/tool_parsers/pythonic_tool_parser.py
index 9c9f3e183..540a65024 100644
--- a/vllm/tool_parsers/pythonic_tool_parser.py
+++ b/vllm/tool_parsers/pythonic_tool_parser.py
@@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -49,8 +50,12 @@ class PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 216ae163b..f9b406b53 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -10,7 +10,6 @@ import regex as re
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -23,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -30,8 +30,8 @@ logger = init_logger(__name__)
class Qwen3CoderToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
@@ -109,9 +109,7 @@ class Qwen3CoderToolParser(ToolParser):
self.accumulated_params = {}
self.streaming_request = None
- def _get_arguments_config(
- self, func_name: str, tools: list[ChatCompletionToolsParam] | None
- ) -> dict:
+ def _get_arguments_config(self, func_name: str, tools: list[Tool] | None) -> dict:
"""Extract argument configuration for a function."""
if tools is None:
return {}
@@ -246,7 +244,7 @@ class Qwen3CoderToolParser(ToolParser):
return param_value
def _parse_xml_function_call(
- self, function_call_str: str, tools: list[ChatCompletionToolsParam] | None
+ self, function_call_str: str, tools: list[Tool] | None
) -> ToolCall | None:
# Extract function name
end_index = function_call_str.find(">")
diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py
index f7dcf20ab..23778091e 100644
--- a/vllm/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/tool_parsers/qwen3xml_tool_parser.py
@@ -11,7 +11,6 @@ import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -24,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -40,7 +40,7 @@ class StreamingXMLToolCallParser:
self.reset_streaming_state()
# Tool configuration information
- self.tools: list[ChatCompletionToolsParam] | None = None
+ self.tools: list[Tool] | None = None
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
self.function_start_token: str = " ToolCall | None:
def get_arguments_config(func_name: str) -> dict:
if tools is None:
diff --git a/vllm/tool_parsers/step3_tool_parser.py b/vllm/tool_parsers/step3_tool_parser.py
index 8e6f27907..a9c569587 100644
--- a/vllm/tool_parsers/step3_tool_parser.py
+++ b/vllm/tool_parsers/step3_tool_parser.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils import random_uuid
@@ -43,8 +44,8 @@ class Step3ToolParser(ToolParser):
TOOL_SEP = "<|tool_sep|>"
SPECIAL_TOKENS = [TOOL_CALLS_BEGIN, TOOL_CALLS_END, TOOL_CALL_BEGIN, TOOL_CALL_END]
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.position = 0
# Explicit state flags for robust streaming
self.tool_block_started = False
diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py
index 4441cd74e..25b310f2a 100644
--- a/vllm/tool_parsers/step3p5_tool_parser.py
+++ b/vllm/tool_parsers/step3p5_tool_parser.py
@@ -11,7 +11,6 @@ import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -23,7 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -38,7 +37,7 @@ class StreamingXMLToolCallParser:
self.reset_streaming_state()
# Tool configuration information
- self.tools: list[ChatCompletionToolsParam] | None = None
+ self.tools: list[Tool] | None = None
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
self.function_start_token: str = "