diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 135aaf13c..493c26d3a 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -565,7 +565,7 @@ class OpenAIServingChat(OpenAIServing): ) tool_parsers: list[ToolParser | None] = [ - self.tool_parser(tokenizer) + self.tool_parser(tokenizer, request.tools) ] * num_choices else: tool_parsers = [None] * num_choices @@ -1331,7 +1331,7 @@ class OpenAIServingChat(OpenAIServing): "Tokenizer not available when `skip_tokenizer_init=True`" ) - tool_parser = self.tool_parser(tokenizer) + tool_parser = self.tool_parser(tokenizer, request.tools) # NOTE: We use token_ids for openai tool parser tool_call_info = tool_parser.extract_tool_calls( "", diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py index d8df1d3c4..eb7e00344 100644 --- a/vllm/entrypoints/openai/engine/serving.py +++ b/vllm/entrypoints/openai/engine/serving.py @@ -925,7 +925,7 @@ class OpenAIServing: # Automatic Tool Call Parsing try: - tool_parser = tool_parser_cls(tokenizer) + tool_parser = tool_parser_cls(tokenizer, request.tools) except RuntimeError as e: logger.exception("Error in tool parser creation.") raise e diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py index e3d7c588a..a31f20501 100644 --- a/vllm/entrypoints/openai/parser/responses_parser.py +++ b/vllm/entrypoints/openai/parser/responses_parser.py @@ -52,7 +52,7 @@ class ResponsesParser: self.reasoning_parser_instance = reasoning_parser_cls(tokenizer) self.tool_parser_instance = None if tool_parser_cls is not None: - self.tool_parser_instance = tool_parser_cls(tokenizer) + self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools) # Store the last finish_reason to determine response status self.finish_reason: str | None = None diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index e71a62461..df94848e3 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -1344,7 +1344,7 @@ class OpenAIServingResponses(OpenAIServing): reasoning_parser = self.parser.reasoning_parser_cls(tokenizer) tool_parser = None if self.parser and self.parser.tool_parser_cls: - tool_parser = self.parser.tool_parser_cls(tokenizer) + tool_parser = self.parser.tool_parser_cls(tokenizer, request.tools) reasoning_ended = False tool_call_text_started = False previous_text = "" diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py index 6009b666d..52f03447d 100644 --- a/vllm/entrypoints/serve/render/serving.py +++ b/vllm/entrypoints/serve/render/serving.py @@ -545,6 +545,8 @@ class OpenAIServingRender: ) raise NotImplementedError(msg) tokenizer = renderer.get_tokenizer() - request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore[arg-type] + request = tool_parser(tokenizer, request.tools).adjust_request( + request=request # type: ignore[arg-type] + ) return conversation, [engine_input] diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index a2c2f0627..dcfe45d38 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -5,13 +5,18 @@ import importlib import os from collections.abc import Callable, Sequence from functools import cached_property +from typing import TypeAlias from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, ResponseTextConfig, ) +from openai.types.responses.tool import Tool as ResponsesTool -from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, ExtractedToolCallInformation, @@ -30,6 +35,8 @@ from vllm.utils.import_utils import import_from_path logger = init_logger(__name__) +Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool + class ToolParser: """ @@ -38,7 +45,11 @@ class ToolParser: derived classes. """ - def __init__(self, tokenizer: TokenizerLike): + def __init__( + self, + tokenizer: TokenizerLike, + tools: list[Tool] | None = None, + ): self.prev_tool_call_arr: list[dict] = [] # the index of the tool call that is currently being parsed self.current_tool_id: int = -1 @@ -46,6 +57,7 @@ class ToolParser: self.streamed_args_for_tool: list[str] = [] self.model_tokenizer = tokenizer + self.tools = tools @cached_property def vocab(self) -> dict[str, int]: diff --git a/vllm/tool_parsers/deepseekv31_tool_parser.py b/vllm/tool_parsers/deepseekv31_tool_parser.py index ad42bb771..e4ade3aae 100644 --- a/vllm/tool_parsers/deepseekv31_tool_parser.py +++ b/vllm/tool_parsers/deepseekv31_tool_parser.py @@ -19,14 +19,14 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser +from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser logger = init_logger(__name__) class DeepSeekV31ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.current_tool_name_sent: bool = False self.prev_tool_call_arr: list[dict] = [] diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index cb39a16fd..e86929944 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -43,8 +44,8 @@ class DeepSeekV32ToolParser(ToolParser): """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.prev_tool_call_arr: list[dict] = [] diff --git a/vllm/tool_parsers/deepseekv3_tool_parser.py b/vllm/tool_parsers/deepseekv3_tool_parser.py index 83bba1c87..e92af87e6 100644 --- a/vllm/tool_parsers/deepseekv3_tool_parser.py +++ b/vllm/tool_parsers/deepseekv3_tool_parser.py @@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -27,8 +28,8 @@ logger = init_logger(__name__) class DeepSeekV3ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.current_tool_name_sent: bool = False self.prev_tool_call_arr: list[dict] = [] diff --git a/vllm/tool_parsers/ernie45_tool_parser.py b/vllm/tool_parsers/ernie45_tool_parser.py index d5dc7a3da..9722dddf7 100644 --- a/vllm/tool_parsers/ernie45_tool_parser.py +++ b/vllm/tool_parsers/ernie45_tool_parser.py @@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -27,12 +28,12 @@ logger = init_logger(__name__) class Ernie45ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): """ Ernie thinking model format: abc\n\n\n\n\ndef\n\n """ - super().__init__(tokenizer) + super().__init__(tokenizer, tools) self.current_tool_name_sent = False self.prev_tool_call_arr: list[dict] = [] self.current_tool_id = -1 diff --git a/vllm/tool_parsers/functiongemma_tool_parser.py b/vllm/tool_parsers/functiongemma_tool_parser.py index 599019b1b..dfd91d974 100644 --- a/vllm/tool_parsers/functiongemma_tool_parser.py +++ b/vllm/tool_parsers/functiongemma_tool_parser.py @@ -20,7 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser +from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser logger = init_logger(__name__) @@ -33,8 +33,8 @@ class FunctionGemmaToolParser(ToolParser): call:func_name{param:value} """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # Streaming state self.current_tool_name_sent: bool = False diff --git a/vllm/tool_parsers/gigachat3_tool_parser.py b/vllm/tool_parsers/gigachat3_tool_parser.py index 90928f9ae..f470f6a5b 100644 --- a/vllm/tool_parsers/gigachat3_tool_parser.py +++ b/vllm/tool_parsers/gigachat3_tool_parser.py @@ -20,7 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser +from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser logger = init_logger(__name__) @@ -46,8 +46,8 @@ ARGS_REGEX = re.compile( class GigaChat3ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.tool_started: bool = False self.tool_name_sent: bool = False self.tool_id: str | None = None diff --git a/vllm/tool_parsers/glm47_moe_tool_parser.py b/vllm/tool_parsers/glm47_moe_tool_parser.py index 8c72342d7..765d6d37d 100644 --- a/vllm/tool_parsers/glm47_moe_tool_parser.py +++ b/vllm/tool_parsers/glm47_moe_tool_parser.py @@ -16,14 +16,15 @@ import regex as re from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike +from vllm.tool_parsers.abstract_tool_parser import Tool from vllm.tool_parsers.glm4_moe_tool_parser import Glm4MoeModelToolParser logger = init_logger(__name__) class Glm47MoeModelToolParser(Glm4MoeModelToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # GLM-4.7 format: func_name[...]* # The function name can be followed by a newline, whitespace, or # directly by tags (no separator). The arg section is diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py index 28d86b68b..fc718921d 100644 --- a/vllm/tool_parsers/glm4_moe_tool_parser.py +++ b/vllm/tool_parsers/glm4_moe_tool_parser.py @@ -21,7 +21,6 @@ import regex as re from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -34,6 +33,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -48,8 +48,8 @@ class Glm4MoeModelToolParser(ToolParser): rather than waiting for the complete tag. """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # Stateful streaming fields self.current_tool_name_sent: bool = False self.prev_tool_call_arr: list[dict[str, Any]] = [] @@ -122,7 +122,7 @@ class Glm4MoeModelToolParser(ToolParser): def _is_string_type( tool_name: str, arg_name: str, - tools: list[ChatCompletionToolsParam] | None, + tools: list[Tool] | None, ) -> bool: if tools is None: return False diff --git a/vllm/tool_parsers/granite4_tool_parser.py b/vllm/tool_parsers/granite4_tool_parser.py index 693c4dc8f..3d58690f5 100644 --- a/vllm/tool_parsers/granite4_tool_parser.py +++ b/vllm/tool_parsers/granite4_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -43,8 +44,8 @@ FuncT = TypeVar("FuncT", bound=_FunctionCallCtor) class Granite4ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.prev_tool_call_arr: list[dict] = [] self.current_tool_id: int = -1 diff --git a/vllm/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/tool_parsers/granite_20b_fc_tool_parser.py index 7fe3c39f7..6d217a030 100644 --- a/vllm/tool_parsers/granite_20b_fc_tool_parser.py +++ b/vllm/tool_parsers/granite_20b_fc_tool_parser.py @@ -24,6 +24,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -46,8 +47,8 @@ class Granite20bFCToolParser(ToolParser): are all set """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.bot_token = "" self.tool_start_token = self.bot_token diff --git a/vllm/tool_parsers/granite_tool_parser.py b/vllm/tool_parsers/granite_tool_parser.py index 7cad01e16..d586db326 100644 --- a/vllm/tool_parsers/granite_tool_parser.py +++ b/vllm/tool_parsers/granite_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -44,8 +45,8 @@ class GraniteToolParser(ToolParser): are all set """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # for granite 3.0, the token `<|tool_call|>` self.bot_token = "<|tool_call|>" # for granite 3.1, the string `` diff --git a/vllm/tool_parsers/hermes_tool_parser.py b/vllm/tool_parsers/hermes_tool_parser.py index 5bde5b2c0..cca2bf9a0 100644 --- a/vllm/tool_parsers/hermes_tool_parser.py +++ b/vllm/tool_parsers/hermes_tool_parser.py @@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.utils.mistral import is_mistral_tokenizer @@ -31,8 +32,8 @@ logger = init_logger(__name__) class Hermes2ProToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) if is_mistral_tokenizer(tokenizer): logger.error("Detected Mistral tokenizer when using a Hermes model") diff --git a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py index 4f446bfcc..29b2a5eae 100644 --- a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py +++ b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import consume_space @@ -31,8 +32,8 @@ logger = init_logger(__name__) class HunyuanA13BToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # Initialize state for streaming mode self.prev_tool_calls: list[dict] = [] diff --git a/vllm/tool_parsers/internlm2_tool_parser.py b/vllm/tool_parsers/internlm2_tool_parser.py index 3b858f34c..fc7c44cff 100644 --- a/vllm/tool_parsers/internlm2_tool_parser.py +++ b/vllm/tool_parsers/internlm2_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import extract_intermediate_diff @@ -30,8 +31,8 @@ logger = init_logger(__name__) class Internlm2ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.position = 0 def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest: diff --git a/vllm/tool_parsers/jamba_tool_parser.py b/vllm/tool_parsers/jamba_tool_parser.py index 98293a4c1..5a9af9910 100644 --- a/vllm/tool_parsers/jamba_tool_parser.py +++ b/vllm/tool_parsers/jamba_tool_parser.py @@ -22,7 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers import ToolParser +from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser from vllm.tool_parsers.utils import extract_intermediate_diff from vllm.utils.mistral import is_mistral_tokenizer @@ -30,8 +30,8 @@ logger = init_logger(__name__) class JambaToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) if is_mistral_tokenizer(self.model_tokenizer): raise ValueError( diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index ed4795215..bc995319e 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -27,8 +28,8 @@ logger = init_logger(__name__) class KimiK2ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.current_tool_name_sent: bool = False self.prev_tool_call_arr: list[dict] = [] self.current_tool_id: int = -1 diff --git a/vllm/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/tool_parsers/llama4_pythonic_tool_parser.py index 93807196d..1921187e9 100644 --- a/vllm/tool_parsers/llama4_pythonic_tool_parser.py +++ b/vllm/tool_parsers/llama4_pythonic_tool_parser.py @@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -47,8 +48,12 @@ class Llama4PythonicToolParser(ToolParser): re.DOTALL, ) - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + tools: list[Tool] | None = None, + ): + super().__init__(tokenizer, tools) # Rename for readability. This is NOT a tool id. @property diff --git a/vllm/tool_parsers/llama_tool_parser.py b/vllm/tool_parsers/llama_tool_parser.py index 527d3f735..be3d47acd 100644 --- a/vllm/tool_parsers/llama_tool_parser.py +++ b/vllm/tool_parsers/llama_tool_parser.py @@ -24,6 +24,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -44,8 +45,12 @@ class Llama3JsonToolParser(ToolParser): llama4_json are set. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + tools: list[Tool] | None = None, + ): + super().__init__(tokenizer, tools) # initialize properties used for state when parsing tool calls in # streaming mode diff --git a/vllm/tool_parsers/longcat_tool_parser.py b/vllm/tool_parsers/longcat_tool_parser.py index 72f13559a..0304f452e 100644 --- a/vllm/tool_parsers/longcat_tool_parser.py +++ b/vllm/tool_parsers/longcat_tool_parser.py @@ -4,12 +4,13 @@ import regex as re from vllm.tokenizers import TokenizerLike +from vllm.tool_parsers.abstract_tool_parser import Tool from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser class LongcatFlashToolParser(Hermes2ProToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.tool_call_start_token: str = "" self.tool_call_end_token: str = "" diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py index a9291adc1..6c75e0099 100644 --- a/vllm/tool_parsers/minimax_m2_tool_parser.py +++ b/vllm/tool_parsers/minimax_m2_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -29,8 +30,8 @@ logger = init_logger(__name__) class MinimaxM2ToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.prev_tool_call_arr: list[dict] = [] diff --git a/vllm/tool_parsers/minimax_tool_parser.py b/vllm/tool_parsers/minimax_tool_parser.py index cb5610fc7..2a2baa03b 100644 --- a/vllm/tool_parsers/minimax_tool_parser.py +++ b/vllm/tool_parsers/minimax_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import extract_intermediate_diff @@ -30,8 +31,8 @@ logger = init_logger(__name__) class MinimaxToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) # Initialize streaming state for tracking tool call progress self.streaming_state: dict[str, Any] = { diff --git a/vllm/tool_parsers/mistral_tool_parser.py b/vllm/tool_parsers/mistral_tool_parser.py index 56ba245ce..153c6ed32 100644 --- a/vllm/tool_parsers/mistral_tool_parser.py +++ b/vllm/tool_parsers/mistral_tool_parser.py @@ -26,6 +26,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.utils.mistral import is_mistral_tokenizer @@ -78,8 +79,8 @@ class MistralToolParser(ToolParser): Used when --enable-auto-tool-choice --tool-call-parser mistral are all set """ - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) if not is_mistral_tokenizer(self.model_tokenizer): logger.info("Non-Mistral tokenizer detected when using a Mistral model...") diff --git a/vllm/tool_parsers/olmo3_tool_parser.py b/vllm/tool_parsers/olmo3_tool_parser.py index dd63b1086..dcbf0a673 100644 --- a/vllm/tool_parsers/olmo3_tool_parser.py +++ b/vllm/tool_parsers/olmo3_tool_parser.py @@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -51,8 +52,12 @@ class Olmo3PythonicToolParser(ToolParser): re.DOTALL, ) - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + tools: list[Tool] | None = None, + ): + super().__init__(tokenizer, tools) # Rename for readability. This is NOT a tool id. @property diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index 76f7a49df..ee6dd7071 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -16,6 +16,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -28,8 +29,8 @@ logger = init_logger(__name__) class OpenAIToolParser(ToolParser): - def __init__(self, tokenizer: "TokenizerLike"): - super().__init__(tokenizer) + def __init__(self, tokenizer: "TokenizerLike", tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) def extract_tool_calls( self, diff --git a/vllm/tool_parsers/phi4mini_tool_parser.py b/vllm/tool_parsers/phi4mini_tool_parser.py index f222cffd6..2dc262bba 100644 --- a/vllm/tool_parsers/phi4mini_tool_parser.py +++ b/vllm/tool_parsers/phi4mini_tool_parser.py @@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -35,8 +36,12 @@ class Phi4MiniJsonToolParser(ToolParser): are all set """ - def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None: - super().__init__(tokenizer) + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + tools: list[Tool] | None = None, + ) -> None: + super().__init__(tokenizer, tools) # initialize properties used for state when parsing tool calls in # streaming mode diff --git a/vllm/tool_parsers/pythonic_tool_parser.py b/vllm/tool_parsers/pythonic_tool_parser.py index 9c9f3e183..540a65024 100644 --- a/vllm/tool_parsers/pythonic_tool_parser.py +++ b/vllm/tool_parsers/pythonic_tool_parser.py @@ -17,6 +17,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.tool_parsers.utils import ( @@ -49,8 +50,12 @@ class PythonicToolParser(ToolParser): re.DOTALL, ) - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + tools: list[Tool] | None = None, + ): + super().__init__(tokenizer, tools) # Rename for readability. This is NOT a tool id. @property diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 216ae163b..f9b406b53 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -10,7 +10,6 @@ import regex as re from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -23,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -30,8 +30,8 @@ logger = init_logger(__name__) class Qwen3CoderToolParser(ToolParser): - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.current_tool_name_sent: bool = False self.prev_tool_call_arr: list[dict] = [] @@ -109,9 +109,7 @@ class Qwen3CoderToolParser(ToolParser): self.accumulated_params = {} self.streaming_request = None - def _get_arguments_config( - self, func_name: str, tools: list[ChatCompletionToolsParam] | None - ) -> dict: + def _get_arguments_config(self, func_name: str, tools: list[Tool] | None) -> dict: """Extract argument configuration for a function.""" if tools is None: return {} @@ -246,7 +244,7 @@ class Qwen3CoderToolParser(ToolParser): return param_value def _parse_xml_function_call( - self, function_call_str: str, tools: list[ChatCompletionToolsParam] | None + self, function_call_str: str, tools: list[Tool] | None ) -> ToolCall | None: # Extract function name end_index = function_call_str.find(">") diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index f7dcf20ab..23778091e 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -11,7 +11,6 @@ import regex as re from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -24,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) @@ -40,7 +40,7 @@ class StreamingXMLToolCallParser: self.reset_streaming_state() # Tool configuration information - self.tools: list[ChatCompletionToolsParam] | None = None + self.tools: list[Tool] | None = None self.tool_call_start_token: str = "" self.tool_call_end_token: str = "" self.function_start_token: str = " ToolCall | None: def get_arguments_config(func_name: str) -> dict: if tools is None: diff --git a/vllm/tool_parsers/step3_tool_parser.py b/vllm/tool_parsers/step3_tool_parser.py index 8e6f27907..a9c569587 100644 --- a/vllm/tool_parsers/step3_tool_parser.py +++ b/vllm/tool_parsers/step3_tool_parser.py @@ -22,6 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( + Tool, ToolParser, ) from vllm.utils import random_uuid @@ -43,8 +44,8 @@ class Step3ToolParser(ToolParser): TOOL_SEP = "<|tool_sep|>" SPECIAL_TOKENS = [TOOL_CALLS_BEGIN, TOOL_CALLS_END, TOOL_CALL_BEGIN, TOOL_CALL_END] - def __init__(self, tokenizer: TokenizerLike): - super().__init__(tokenizer) + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): + super().__init__(tokenizer, tools) self.position = 0 # Explicit state flags for robust streaming self.tool_block_started = False diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py index 4441cd74e..25b310f2a 100644 --- a/vllm/tool_parsers/step3p5_tool_parser.py +++ b/vllm/tool_parsers/step3p5_tool_parser.py @@ -11,7 +11,6 @@ import regex as re from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -23,7 +22,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ) from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser +from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser logger = init_logger(__name__) @@ -38,7 +37,7 @@ class StreamingXMLToolCallParser: self.reset_streaming_state() # Tool configuration information - self.tools: list[ChatCompletionToolsParam] | None = None + self.tools: list[Tool] | None = None self.tool_call_start_token: str = "" self.tool_call_end_token: str = "" self.function_start_token: str = "