diff --git a/tests/entrypoints/openai/test_chat_error.py b/tests/entrypoints/openai/test_chat_error.py index 7b15421fb..de5d96d5b 100644 --- a/tests/entrypoints/openai/test_chat_error.py +++ b/tests/entrypoints/openai/test_chat_error.py @@ -36,6 +36,7 @@ class MockHFConfig: class MockModelConfig: task = "generate" runner_type = "generate" + model = MODEL_NAME tokenizer = MODEL_NAME trust_remote_code = False tokenizer_mode = "auto" diff --git a/tests/entrypoints/openai/test_completion_error.py b/tests/entrypoints/openai/test_completion_error.py index 01c4e567c..b60397cd7 100644 --- a/tests/entrypoints/openai/test_completion_error.py +++ b/tests/entrypoints/openai/test_completion_error.py @@ -36,6 +36,7 @@ class MockHFConfig: class MockModelConfig: task = "generate" runner_type = "generate" + model = MODEL_NAME tokenizer = MODEL_NAME trust_remote_code = False tokenizer_mode = "auto" diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index b966e7dd7..4365075f6 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -511,6 +511,7 @@ class MockHFConfig: class MockModelConfig: task = "generate" runner_type = "generate" + model = MODEL_NAME tokenizer = MODEL_NAME trust_remote_code = False tokenizer_mode = "auto" diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index e618b11ad..21bc0f442 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt from vllm.logger import init_logger from vllm.logprobs import Logprob from vllm.outputs import CompletionOutput, RequestOutput +from vllm.parser import ParserManager from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.tokenizers import TokenizerLike from vllm.tokenizers.mistral import ( @@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing): self.logits_processors = self.model_config.logits_processors # set up reasoning parser - self.reasoning_parser = self._get_reasoning_parser( + self.reasoning_parser = ParserManager.get_reasoning_parser( reasoning_parser_name=reasoning_parser ) # set up tool use self.enable_auto_tools: bool = enable_auto_tools - self.tool_parser = self._get_tool_parser( - tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools + self.tool_parser = ParserManager.get_tool_parser( + tool_parser_name=tool_parser, + enable_auto_tools=enable_auto_tools, + model_name=self.model_config.model, ) self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py index 7f9300a1a..801c7dcd5 100644 --- a/vllm/entrypoints/openai/engine/serving.py +++ b/vllm/entrypoints/openai/engine/serving.py @@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest from vllm.multimodal import MultiModalDataDict from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput from vllm.pooling_params import PoolingParams -from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers import ToolParser, ToolParserManager +from vllm.tool_parsers import ToolParser from vllm.tracing import ( contains_trace_headers, extract_trace_headers, @@ -246,46 +245,6 @@ class OpenAIServing: self.model_config = self.models.model_config self.max_model_len = self.model_config.max_model_len - def _get_tool_parser( - self, tool_parser_name: str | None = None, enable_auto_tools: bool = False - ) -> Callable[[TokenizerLike], ToolParser] | None: - """Get the tool parser based on the name.""" - parser = None - if not enable_auto_tools or tool_parser_name is None: - return parser - logger.info('"auto" tool choice has been enabled.') - - try: - if tool_parser_name == "pythonic" and self.model_config.model.startswith( - "meta-llama/Llama-3.2" - ): - logger.warning( - "Llama3.2 models may struggle to emit valid pythonic tool calls" - ) - parser = ToolParserManager.get_tool_parser(tool_parser_name) - except Exception as e: - raise TypeError( - "Error: --enable-auto-tool-choice requires " - f"tool_parser:'{tool_parser_name}' which has not " - "been registered" - ) from e - return parser - - def _get_reasoning_parser( - self, - reasoning_parser_name: str, - ) -> Callable[[TokenizerLike], ReasoningParser] | None: - """Get the reasoning parser based on the name.""" - parser = None - if not reasoning_parser_name: - return None - try: - parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name) - assert parser is not None - except Exception as e: - raise TypeError(f"{reasoning_parser_name=} has not been registered") from e - return parser - async def beam_search( self, prompt: PromptType, diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index cd6aa48c3..32cce3ef4 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -123,6 +123,7 @@ from vllm.logger import init_logger from vllm.logprobs import Logprob as SampleLogprob from vllm.logprobs import SampleLogprobs from vllm.outputs import CompletionOutput +from vllm.parser import ParserManager from vllm.sampling_params import SamplingParams, StructuredOutputsParams from vllm.tokenizers import TokenizerLike from vllm.utils import random_uuid @@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing): self.chat_template_content_format: Final = chat_template_content_format self.enable_log_outputs = enable_log_outputs - self.reasoning_parser = self._get_reasoning_parser( - reasoning_parser_name=reasoning_parser + # Set up the unified parser - either a unified parser or fall back to + # separate parsers accessed through the parser interface + self.parser = ParserManager.get_parser( + tool_parser_name=tool_parser, + reasoning_parser_name=reasoning_parser, + enable_auto_tools=enable_auto_tools, + model_name=self.model_config.model, ) self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_force_include_usage = enable_force_include_usage @@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing): self.tool_call_id_type = "random" self.enable_auto_tools = enable_auto_tools - # set up tool use - self.tool_parser = self._get_tool_parser( - tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools - ) # HACK(woosuk): This is a hack. We should use a better store. # FIXME: If enable_store=True, this may cause a memory leak since we # never remove responses from the store. @@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing): context = ParsableContext( response_messages=messages, tokenizer=tokenizer, - reasoning_parser_cls=self.reasoning_parser, + reasoning_parser_cls=self.parser.reasoning_parser_cls + if self.parser + else None, request=request, - tool_parser_cls=self.tool_parser, + tool_parser_cls=self.parser.tool_parser_cls + if self.parser + else None, available_tools=available_tools, chat_template=self.chat_template, chat_template_content_format=self.chat_template_content_format, @@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing): else: context = SimpleContext() - if self.reasoning_parser is not None: - reasoning_parser = self.reasoning_parser(tokenizer) + if self.parser and self.parser.reasoning_parser_cls is not None: + reasoning_parser = self.parser.reasoning_parser_cls(tokenizer) if ( isinstance( struct_out := sampling_params.structured_outputs, @@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing): default_template_content_format=self.chat_template_content_format, default_template_kwargs=None, tool_dicts=tool_dicts, - tool_parser=self.tool_parser, + tool_parser=self.parser.tool_parser_cls if self.parser else None, ) return messages, engine_prompts @@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing): final_output: CompletionOutput, tokenizer: TokenizerLike, ) -> list[ResponseOutputItem]: - if self.reasoning_parser: + if self.parser and self.parser.reasoning_parser_cls: try: - reasoning_parser = self.reasoning_parser(tokenizer) + reasoning_parser = self.parser.reasoning_parser_cls(tokenizer) except RuntimeError as e: logger.exception("Error in reasoning parser creation.") raise e @@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing): tokenizer=tokenizer, content=content, enable_auto_tools=self.enable_auto_tools, - tool_parser_cls=self.tool_parser, + tool_parser_cls=self.parser.tool_parser_cls if self.parser else None, ) if content or (self.use_harmony and tool_calls): @@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing): current_output_index = 0 current_item_id = "" reasoning_parser = None - if self.reasoning_parser: - reasoning_parser = self.reasoning_parser(tokenizer) + if self.parser and self.parser.reasoning_parser_cls: + reasoning_parser = self.parser.reasoning_parser_cls(tokenizer) previous_text = "" previous_token_ids: list[int] = [] first_delta_sent = False diff --git a/vllm/parser/__init__.py b/vllm/parser/__init__.py new file mode 100644 index 000000000..8bce3e912 --- /dev/null +++ b/vllm/parser/__init__.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from vllm.parser.abstract_parser import ( + DelegatingParser, + Parser, + _WrappedParser, +) +from vllm.parser.parser_manager import ParserManager + +__all__ = [ + "Parser", + "DelegatingParser", + "ParserManager", + "_WrappedParser", +] + +_PARSERS_TO_REGISTER = { + "minimax_m2": ( # name + "minimax_m2_parser", # filename + "MiniMaxM2Parser", # class_name + ), +} + +# Register lazy parsers +ParserManager.register_lazy_module( + name="minimax_m2", + module_path="vllm.parser.minimax_m2_parser", + class_name="MiniMaxM2Parser", +) + + +def register_lazy_parsers(): + for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items(): + module_path = f"vllm.parser.{file_name}" + ParserManager.register_lazy_module(name, module_path, class_name) + + +register_lazy_parsers() diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py new file mode 100644 index 000000000..f5cd1430a --- /dev/null +++ b/vllm/parser/abstract_parser.py @@ -0,0 +1,341 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from abc import abstractmethod +from collections.abc import Sequence +from functools import cached_property + +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, +) +from vllm.entrypoints.openai.engine.protocol import ( + DeltaMessage, + ExtractedToolCallInformation, +) +from vllm.entrypoints.openai.responses.protocol import ( + ResponsesRequest, +) +from vllm.reasoning.abs_reasoning_parsers import ReasoningParser +from vllm.tokenizers import TokenizerLike +from vllm.tool_parsers.abstract_tool_parser import ToolParser + + +class Parser: + """ + Abstract Parser class that unifies ReasoningParser and ToolParser into + a single interface for parsing model output. + + This class provides a unified way to handle both reasoning extraction + (e.g., chain-of-thought content in tags) and tool call extraction + (e.g., function calls in XML/JSON format) from model outputs. + + Subclasses can either: + 1. Override the abstract methods directly for custom parsing logic + 2. Set `reasoning_parser` and `tool_parser` properties to delegate to + existing parser implementations + + Class Attributes: + reasoning_parser_cls: The ReasoningParser class to use (for compatibility + with code that needs the class, not instance). + tool_parser_cls: The ToolParser class to use (for compatibility with + code that needs the class, not instance). + """ + + # Class-level parser classes for compatibility with existing patterns + # Subclasses should override these if they use specific parser classes + reasoning_parser_cls: type[ReasoningParser] | None = None + tool_parser_cls: type[ToolParser] | None = None + + def __init__(self, tokenizer: TokenizerLike, *args, **kwargs): + """ + Initialize the Parser. + + Args: + tokenizer: The tokenizer used by the model. This is required for + token-based parsing operations. + """ + self.model_tokenizer = tokenizer + self._reasoning_parser: ReasoningParser | None = None + self._tool_parser: ToolParser | None = None + + @cached_property + def vocab(self) -> dict[str, int]: + """Get the vocabulary mapping from tokens to IDs.""" + return self.model_tokenizer.get_vocab() + + @property + def reasoning_parser(self) -> ReasoningParser | None: + """The underlying reasoning parser, if any.""" + return self._reasoning_parser + + @reasoning_parser.setter + def reasoning_parser(self, parser: ReasoningParser | None) -> None: + self._reasoning_parser = parser + + @property + def tool_parser(self) -> ToolParser | None: + """The underlying tool parser, if any.""" + return self._tool_parser + + @tool_parser.setter + def tool_parser(self, parser: ToolParser | None) -> None: + self._tool_parser = parser + + # ========== Reasoning Parser Methods ========== + + @abstractmethod + def is_reasoning_end(self, input_ids: list[int]) -> bool: + """ + Check if the reasoning content ends in the input_ids. + + Used by structured engines like `xgrammar` to check if the + reasoning content ends in the model output. + + Args: + input_ids: The token IDs of the model output. + + Returns: + True if the reasoning content ends in the input_ids. + """ + + def is_reasoning_end_streaming( + self, input_ids: list[int], delta_ids: list[int] + ) -> bool: + """ + Check if the reasoning content ends during a decode step. + + Args: + input_ids: The entire model output token IDs. + delta_ids: The last few computed tokens at the current decode step. + + Returns: + True if the reasoning content ends in the delta_ids. + """ + return self.is_reasoning_end(input_ids) + + @abstractmethod + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + """ + Extract content token IDs from the input_ids. + + This extracts the non-reasoning content (e.g., everything after + the tag). + + Args: + input_ids: The token IDs of the model output. + + Returns: + The extracted content token IDs. + """ + + @abstractmethod + def extract_reasoning( + self, + model_output: str, + request: ChatCompletionRequest | ResponsesRequest, + ) -> tuple[str | None, str | None]: + """ + Extract reasoning content from a complete model-generated string. + + Used for non-streaming responses where we have the entire model + response available before sending to the client. + + Args: + model_output: The complete model-generated string. + request: The request object used to generate the output. + + Returns: + A tuple of (reasoning_content, response_content). + """ + + @abstractmethod + def extract_reasoning_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + ) -> DeltaMessage | None: + """ + Extract reasoning content from a streaming delta message. + + Args: + previous_text: Text from all previous tokens. + current_text: Text including the current delta. + delta_text: The new text in this delta. + previous_token_ids: Token IDs from previous generation. + current_token_ids: All token IDs including current. + delta_token_ids: The new token IDs in this delta. + + Returns: + A DeltaMessage with reasoning and/or content fields, or None. + """ + + # ========== Tool Parser Methods ========== + + def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest: + """ + Adjust the request parameters for tool calling. + + Can be overridden by subclasses to modify request parameters + (e.g., setting structured output schemas for tool calling). + + Args: + request: The original request. + + Returns: + The adjusted request. + """ + return request + + @abstractmethod + def extract_tool_calls( + self, + model_output: str, + request: ChatCompletionRequest, + ) -> ExtractedToolCallInformation: + """ + Extract tool calls from a complete model-generated string. + + Used for non-streaming responses. + + Args: + model_output: The complete model-generated string. + request: The request object used to generate the output. + + Returns: + ExtractedToolCallInformation containing the tool calls. + """ + + @abstractmethod + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + """ + Extract tool calls from a streaming delta message. + + Args: + previous_text: Text from all previous tokens. + current_text: Text including the current delta. + delta_text: The new text in this delta. + previous_token_ids: Token IDs from previous generation. + current_token_ids: All token IDs including current. + delta_token_ids: The new token IDs in this delta. + request: The request object. + + Returns: + A DeltaMessage with tool_calls field, or None. + """ + + +class DelegatingParser(Parser): + """ + A Parser implementation that delegates to separate ReasoningParser and + ToolParser instances. + + This is the recommended base class for creating model-specific parsers + that combine existing reasoning and tool parser implementations. + Subclasses should set `self._reasoning_parser` and `self._tool_parser` + in their `__init__` method. + + If either parser is None, the corresponding methods will return default + values (no reasoning extraction, no tool calls). + """ + + def extract_reasoning( + self, + model_output: str, + request: ChatCompletionRequest | ResponsesRequest, + ) -> tuple[str | None, str | None]: + if self._reasoning_parser is None: + return None, model_output + return self._reasoning_parser.extract_reasoning(model_output, request) + + def extract_reasoning_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + ) -> DeltaMessage | None: + if self._reasoning_parser is None: + return DeltaMessage(content=delta_text) + return self._reasoning_parser.extract_reasoning_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + ) + + def extract_tool_calls( + self, + model_output: str, + request: ChatCompletionRequest, + ) -> ExtractedToolCallInformation: + if self._tool_parser is None: + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + return self._tool_parser.extract_tool_calls(model_output, request) + + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + if self._tool_parser is None: + return None + return self._tool_parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request, + ) + + +class _WrappedParser(DelegatingParser): + """ + A DelegatingParser subclass that instantiates parsers from class attributes. + + This class is used to dynamically create a parser that wraps individual + ReasoningParser and ToolParser classes. The class attributes + `reasoning_parser_cls` and `tool_parser_cls` should be set before + instantiation. + + Usage: + _WrappedParser.reasoning_parser_cls = MyReasoningParser + _WrappedParser.tool_parser_cls = MyToolParser + parser = _WrappedParser(tokenizer) + """ + + reasoning_parser_cls: type[ReasoningParser] | None = None + tool_parser_cls: type[ToolParser] | None = None + + def __init__(self, tokenizer: TokenizerLike): + super().__init__(tokenizer) + # Instantiate the underlying parsers from class attributes + if self.__class__.reasoning_parser_cls is not None: + self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer) + if self.__class__.tool_parser_cls is not None: + self._tool_parser = self.__class__.tool_parser_cls(tokenizer) diff --git a/vllm/parser/minimax_m2_parser.py b/vllm/parser/minimax_m2_parser.py new file mode 100644 index 000000000..ee092d4f5 --- /dev/null +++ b/vllm/parser/minimax_m2_parser.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +""" +MiniMax M2 Parser - A unified parser for MiniMax M2 models. + +This parser combines the existing MiniMaxM2ReasoningParser and +MinimaxM2ToolParser into a single unified interface by delegating +to those implementations. +""" + +from vllm.logger import init_logger +from vllm.parser.abstract_parser import DelegatingParser +from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser +from vllm.tokenizers import TokenizerLike +from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser + +logger = init_logger(__name__) + + +class MiniMaxM2Parser(DelegatingParser): + """ + Unified parser for MiniMax M2 models that handles both reasoning + extraction and tool call parsing. + + This parser delegates to the existing implementations: + - MiniMaxM2ReasoningParser for reasoning extraction + - MinimaxM2ToolParser for tool call parsing + + MiniMax M2 models have two special behaviors: + 1. Reasoning: They don't generate start token, only end + token. All content before is reasoning, content after is the + actual response. + 2. Tool Calls: They use ... tags + with ... and ... + syntax. + """ + + # Class-level parser classes for compatibility + reasoning_parser_cls = MiniMaxM2ReasoningParser + tool_parser_cls = MinimaxM2ToolParser + + def __init__(self, tokenizer: TokenizerLike): + super().__init__(tokenizer) + + # Initialize the underlying parsers + self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer) + self._tool_parser = MinimaxM2ToolParser(tokenizer) + + logger.debug( + "vLLM Successfully initialized parser %s!", self.__class__.__name__ + ) diff --git a/vllm/parser/parser_manager.py b/vllm/parser/parser_manager.py new file mode 100644 index 000000000..4331eba98 --- /dev/null +++ b/vllm/parser/parser_manager.py @@ -0,0 +1,308 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from __future__ import annotations + +import importlib +import os +from collections.abc import Callable +from typing import TYPE_CHECKING + +from vllm.logger import init_logger +from vllm.utils.collection_utils import is_list_of +from vllm.utils.import_utils import import_from_path + +if TYPE_CHECKING: + from vllm.parser.abstract_parser import Parser + from vllm.reasoning import ReasoningParser + from vllm.tool_parsers import ToolParser + +logger = init_logger(__name__) + + +class ParserManager: + """ + Central registry for Parser implementations. + + Supports two registration modes: + - Eager registration via `register_module` + - Lazy registration via `register_lazy_module` + """ + + parsers: dict[str, type[Parser]] = {} + lazy_parsers: dict[str, tuple[str, str]] = {} # name -> (module_path, class_name) + + @classmethod + def get_parser_internal(cls, name: str) -> type[Parser]: + """ + Retrieve a registered or lazily registered Parser class. + + Args: + name: The registered name of the parser. + + Returns: + The Parser class. + + Raises: + KeyError: If no parser is found under the given name. + """ + if name in cls.parsers: + return cls.parsers[name] + + if name in cls.lazy_parsers: + return cls._load_lazy_parser(name) + + registered = ", ".join(cls.list_registered()) + raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}") + + @classmethod + def _load_lazy_parser(cls, name: str) -> type[Parser]: + """Import and register a lazily loaded parser.""" + from vllm.parser.abstract_parser import Parser + + module_path, class_name = cls.lazy_parsers[name] + try: + mod = importlib.import_module(module_path) + parser_cls = getattr(mod, class_name) + if not issubclass(parser_cls, Parser): + raise TypeError( + f"{class_name} in {module_path} is not a Parser subclass." + ) + cls.parsers[name] = parser_cls # cache + return parser_cls + except Exception as e: + logger.exception( + "Failed to import lazy parser '%s' from %s: %s", + name, + module_path, + e, + ) + raise + + @classmethod + def _register_module( + cls, + module: type[Parser], + module_name: str | list[str] | None = None, + force: bool = True, + ) -> None: + """Register a Parser class immediately.""" + from vllm.parser.abstract_parser import Parser + + if not issubclass(module, Parser): + raise TypeError( + f"module must be subclass of Parser, but got {type(module)}" + ) + + if module_name is None: + module_names = [module.__name__] + elif isinstance(module_name, str): + module_names = [module_name] + elif is_list_of(module_name, str): + module_names = module_name + else: + raise TypeError("module_name must be str, list[str], or None.") + + for name in module_names: + if not force and name in cls.parsers: + existed = cls.parsers[name] + raise KeyError(f"{name} is already registered at {existed.__module__}") + cls.parsers[name] = module + + @classmethod + def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None: + """ + Register a lazy module mapping for delayed import. + + Example: + ParserManager.register_lazy_module( + name="minimax_m2", + module_path="vllm.parser.minimax_m2_parser", + class_name="MiniMaxM2Parser", + ) + """ + cls.lazy_parsers[name] = (module_path, class_name) + + @classmethod + def register_module( + cls, + name: str | list[str] | None = None, + force: bool = True, + module: type[Parser] | None = None, + ) -> type[Parser] | Callable[[type[Parser]], type[Parser]]: + """ + Register a Parser class. + + Can be used as a decorator or called directly. + + Usage: + @ParserManager.register_module("my_parser") + class MyParser(Parser): + ... + + Or: + ParserManager.register_module(module=MyParser) + """ + if not isinstance(force, bool): + raise TypeError(f"force must be a boolean, but got {type(force)}") + + # Immediate registration + if module is not None: + cls._register_module(module=module, module_name=name, force=force) + return module + + # Decorator usage + def _decorator(obj: type[Parser]) -> type[Parser]: + module_path = obj.__module__ + class_name = obj.__name__ + + if isinstance(name, str): + names = [name] + elif is_list_of(name, str): + names = name + else: + names = [class_name] + + for n in names: + cls.lazy_parsers[n] = (module_path, class_name) + + return obj + + return _decorator + + @classmethod + def list_registered(cls) -> list[str]: + """Return names of all registered parsers.""" + return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys())) + + @classmethod + def import_parser(cls, plugin_path: str) -> None: + """Import a user-defined parser from an arbitrary path.""" + module_name = os.path.splitext(os.path.basename(plugin_path))[0] + try: + import_from_path(module_name, plugin_path) + except Exception: + logger.exception( + "Failed to load module '%s' from %s.", module_name, plugin_path + ) + + @classmethod + def get_tool_parser( + cls, + tool_parser_name: str | None = None, + enable_auto_tools: bool = False, + model_name: str | None = None, + ) -> type[ToolParser] | None: + """Get the tool parser based on the name.""" + from vllm.tool_parsers import ToolParserManager + + parser: type[ToolParser] | None = None + if not enable_auto_tools or tool_parser_name is None: + return parser + logger.info('"auto" tool choice has been enabled.') + + try: + if ( + tool_parser_name == "pythonic" + and model_name + and model_name.startswith("meta-llama/Llama-3.2") + ): + logger.warning( + "Llama3.2 models may struggle to emit valid pythonic tool calls" + ) + parser = ToolParserManager.get_tool_parser(tool_parser_name) + except Exception as e: + raise TypeError( + "Error: --enable-auto-tool-choice requires " + f"tool_parser:'{tool_parser_name}' which has not " + "been registered" + ) from e + return parser + + @classmethod + def get_reasoning_parser( + cls, + reasoning_parser_name: str | None, + ) -> type[ReasoningParser] | None: + """Get the reasoning parser based on the name.""" + from vllm.reasoning import ReasoningParserManager + + parser: type[ReasoningParser] | None = None + if not reasoning_parser_name: + return None + try: + parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name) + assert parser is not None + except Exception as e: + raise TypeError(f"{reasoning_parser_name=} has not been registered") from e + return parser + + @classmethod + def get_parser( + cls, + tool_parser_name: str | None = None, + reasoning_parser_name: str | None = None, + enable_auto_tools: bool = False, + model_name: str | None = None, + ) -> type[Parser] | None: + """ + Get a unified Parser that handles both reasoning and tool parsing. + + This method checks if a unified Parser exists that can handle both + reasoning extraction and tool call parsing. If no unified parser + exists, it creates a DelegatingParser that wraps the individual + reasoning and tool parsers. + + Args: + tool_parser_name: The name of the tool parser. + reasoning_parser_name: The name of the reasoning parser. + enable_auto_tools: Whether auto tool choice is enabled. + model_name: The model name for parser-specific warnings. + + Returns: + A Parser class, or None if neither parser is specified. + """ + from vllm.parser.abstract_parser import _WrappedParser + + if not tool_parser_name and not reasoning_parser_name: + return None + + # Strategy 1: If both names match, check for a unified parser with that name + if tool_parser_name and tool_parser_name == reasoning_parser_name: + try: + parser = cls.get_parser_internal(tool_parser_name) + logger.info( + "Using unified parser '%s' for both reasoning and tool parsing.", + tool_parser_name, + ) + return parser + except KeyError: + pass # No unified parser with this name + + # Strategy 2: Check for parser with either name + for name in [tool_parser_name, reasoning_parser_name]: + if name: + try: + parser = cls.get_parser_internal(name) + logger.info( + "Using unified parser '%s' for reasoning and tool parsing.", + name, + ) + return parser + except KeyError: + pass + + # Strategy 3: Create a DelegatingParser with the individual parser classes + reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name) + tool_parser_cls = cls.get_tool_parser( + tool_parser_name, enable_auto_tools, model_name + ) + + if reasoning_parser_cls is None and tool_parser_cls is None: + return None + + # Set the class-level attributes on the imported _WrappedParser + _WrappedParser.reasoning_parser_cls = reasoning_parser_cls + _WrappedParser.tool_parser_cls = tool_parser_cls + + return _WrappedParser