diff --git a/tests/entrypoints/openai/test_chat_error.py b/tests/entrypoints/openai/test_chat_error.py
index 7b15421fb..de5d96d5b 100644
--- a/tests/entrypoints/openai/test_chat_error.py
+++ b/tests/entrypoints/openai/test_chat_error.py
@@ -36,6 +36,7 @@ class MockHFConfig:
class MockModelConfig:
task = "generate"
runner_type = "generate"
+ model = MODEL_NAME
tokenizer = MODEL_NAME
trust_remote_code = False
tokenizer_mode = "auto"
diff --git a/tests/entrypoints/openai/test_completion_error.py b/tests/entrypoints/openai/test_completion_error.py
index 01c4e567c..b60397cd7 100644
--- a/tests/entrypoints/openai/test_completion_error.py
+++ b/tests/entrypoints/openai/test_completion_error.py
@@ -36,6 +36,7 @@ class MockHFConfig:
class MockModelConfig:
task = "generate"
runner_type = "generate"
+ model = MODEL_NAME
tokenizer = MODEL_NAME
trust_remote_code = False
tokenizer_mode = "auto"
diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
index b966e7dd7..4365075f6 100644
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -511,6 +511,7 @@ class MockHFConfig:
class MockModelConfig:
task = "generate"
runner_type = "generate"
+ model = MODEL_NAME
tokenizer = MODEL_NAME
trust_remote_code = False
tokenizer_mode = "auto"
diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index e618b11ad..21bc0f442 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt
from vllm.logger import init_logger
from vllm.logprobs import Logprob
from vllm.outputs import CompletionOutput, RequestOutput
+from vllm.parser import ParserManager
from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers.mistral import (
@@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing):
self.logits_processors = self.model_config.logits_processors
# set up reasoning parser
- self.reasoning_parser = self._get_reasoning_parser(
+ self.reasoning_parser = ParserManager.get_reasoning_parser(
reasoning_parser_name=reasoning_parser
)
# set up tool use
self.enable_auto_tools: bool = enable_auto_tools
- self.tool_parser = self._get_tool_parser(
- tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
+ self.tool_parser = ParserManager.get_tool_parser(
+ tool_parser_name=tool_parser,
+ enable_auto_tools=enable_auto_tools,
+ model_name=self.model_config.model,
)
self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none
diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py
index 7f9300a1a..801c7dcd5 100644
--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest
from vllm.multimodal import MultiModalDataDict
from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
from vllm.pooling_params import PoolingParams
-from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers import ToolParser, ToolParserManager
+from vllm.tool_parsers import ToolParser
from vllm.tracing import (
contains_trace_headers,
extract_trace_headers,
@@ -246,46 +245,6 @@ class OpenAIServing:
self.model_config = self.models.model_config
self.max_model_len = self.model_config.max_model_len
- def _get_tool_parser(
- self, tool_parser_name: str | None = None, enable_auto_tools: bool = False
- ) -> Callable[[TokenizerLike], ToolParser] | None:
- """Get the tool parser based on the name."""
- parser = None
- if not enable_auto_tools or tool_parser_name is None:
- return parser
- logger.info('"auto" tool choice has been enabled.')
-
- try:
- if tool_parser_name == "pythonic" and self.model_config.model.startswith(
- "meta-llama/Llama-3.2"
- ):
- logger.warning(
- "Llama3.2 models may struggle to emit valid pythonic tool calls"
- )
- parser = ToolParserManager.get_tool_parser(tool_parser_name)
- except Exception as e:
- raise TypeError(
- "Error: --enable-auto-tool-choice requires "
- f"tool_parser:'{tool_parser_name}' which has not "
- "been registered"
- ) from e
- return parser
-
- def _get_reasoning_parser(
- self,
- reasoning_parser_name: str,
- ) -> Callable[[TokenizerLike], ReasoningParser] | None:
- """Get the reasoning parser based on the name."""
- parser = None
- if not reasoning_parser_name:
- return None
- try:
- parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
- assert parser is not None
- except Exception as e:
- raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
- return parser
-
async def beam_search(
self,
prompt: PromptType,
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index cd6aa48c3..32cce3ef4 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -123,6 +123,7 @@ from vllm.logger import init_logger
from vllm.logprobs import Logprob as SampleLogprob
from vllm.logprobs import SampleLogprobs
from vllm.outputs import CompletionOutput
+from vllm.parser import ParserManager
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
from vllm.tokenizers import TokenizerLike
from vllm.utils import random_uuid
@@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing):
self.chat_template_content_format: Final = chat_template_content_format
self.enable_log_outputs = enable_log_outputs
- self.reasoning_parser = self._get_reasoning_parser(
- reasoning_parser_name=reasoning_parser
+ # Set up the unified parser - either a unified parser or fall back to
+ # separate parsers accessed through the parser interface
+ self.parser = ParserManager.get_parser(
+ tool_parser_name=tool_parser,
+ reasoning_parser_name=reasoning_parser,
+ enable_auto_tools=enable_auto_tools,
+ model_name=self.model_config.model,
)
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
@@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing):
self.tool_call_id_type = "random"
self.enable_auto_tools = enable_auto_tools
- # set up tool use
- self.tool_parser = self._get_tool_parser(
- tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
- )
# HACK(woosuk): This is a hack. We should use a better store.
# FIXME: If enable_store=True, this may cause a memory leak since we
# never remove responses from the store.
@@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing):
context = ParsableContext(
response_messages=messages,
tokenizer=tokenizer,
- reasoning_parser_cls=self.reasoning_parser,
+ reasoning_parser_cls=self.parser.reasoning_parser_cls
+ if self.parser
+ else None,
request=request,
- tool_parser_cls=self.tool_parser,
+ tool_parser_cls=self.parser.tool_parser_cls
+ if self.parser
+ else None,
available_tools=available_tools,
chat_template=self.chat_template,
chat_template_content_format=self.chat_template_content_format,
@@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing):
else:
context = SimpleContext()
- if self.reasoning_parser is not None:
- reasoning_parser = self.reasoning_parser(tokenizer)
+ if self.parser and self.parser.reasoning_parser_cls is not None:
+ reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
if (
isinstance(
struct_out := sampling_params.structured_outputs,
@@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing):
default_template_content_format=self.chat_template_content_format,
default_template_kwargs=None,
tool_dicts=tool_dicts,
- tool_parser=self.tool_parser,
+ tool_parser=self.parser.tool_parser_cls if self.parser else None,
)
return messages, engine_prompts
@@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing):
final_output: CompletionOutput,
tokenizer: TokenizerLike,
) -> list[ResponseOutputItem]:
- if self.reasoning_parser:
+ if self.parser and self.parser.reasoning_parser_cls:
try:
- reasoning_parser = self.reasoning_parser(tokenizer)
+ reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
except RuntimeError as e:
logger.exception("Error in reasoning parser creation.")
raise e
@@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing):
tokenizer=tokenizer,
content=content,
enable_auto_tools=self.enable_auto_tools,
- tool_parser_cls=self.tool_parser,
+ tool_parser_cls=self.parser.tool_parser_cls if self.parser else None,
)
if content or (self.use_harmony and tool_calls):
@@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing):
current_output_index = 0
current_item_id = ""
reasoning_parser = None
- if self.reasoning_parser:
- reasoning_parser = self.reasoning_parser(tokenizer)
+ if self.parser and self.parser.reasoning_parser_cls:
+ reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
previous_text = ""
previous_token_ids: list[int] = []
first_delta_sent = False
diff --git a/vllm/parser/__init__.py b/vllm/parser/__init__.py
new file mode 100644
index 000000000..8bce3e912
--- /dev/null
+++ b/vllm/parser/__init__.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.parser.abstract_parser import (
+ DelegatingParser,
+ Parser,
+ _WrappedParser,
+)
+from vllm.parser.parser_manager import ParserManager
+
+__all__ = [
+ "Parser",
+ "DelegatingParser",
+ "ParserManager",
+ "_WrappedParser",
+]
+
+_PARSERS_TO_REGISTER = {
+ "minimax_m2": ( # name
+ "minimax_m2_parser", # filename
+ "MiniMaxM2Parser", # class_name
+ ),
+}
+
+# Register lazy parsers
+ParserManager.register_lazy_module(
+ name="minimax_m2",
+ module_path="vllm.parser.minimax_m2_parser",
+ class_name="MiniMaxM2Parser",
+)
+
+
+def register_lazy_parsers():
+ for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items():
+ module_path = f"vllm.parser.{file_name}"
+ ParserManager.register_lazy_module(name, module_path, class_name)
+
+
+register_lazy_parsers()
diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py
new file mode 100644
index 000000000..f5cd1430a
--- /dev/null
+++ b/vllm/parser/abstract_parser.py
@@ -0,0 +1,341 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import abstractmethod
+from collections.abc import Sequence
+from functools import cached_property
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+ ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+ DeltaMessage,
+ ExtractedToolCallInformation,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+ ResponsesRequest,
+)
+from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import ToolParser
+
+
+class Parser:
+ """
+ Abstract Parser class that unifies ReasoningParser and ToolParser into
+ a single interface for parsing model output.
+
+ This class provides a unified way to handle both reasoning extraction
+ (e.g., chain-of-thought content in tags) and tool call extraction
+ (e.g., function calls in XML/JSON format) from model outputs.
+
+ Subclasses can either:
+ 1. Override the abstract methods directly for custom parsing logic
+ 2. Set `reasoning_parser` and `tool_parser` properties to delegate to
+ existing parser implementations
+
+ Class Attributes:
+ reasoning_parser_cls: The ReasoningParser class to use (for compatibility
+ with code that needs the class, not instance).
+ tool_parser_cls: The ToolParser class to use (for compatibility with
+ code that needs the class, not instance).
+ """
+
+ # Class-level parser classes for compatibility with existing patterns
+ # Subclasses should override these if they use specific parser classes
+ reasoning_parser_cls: type[ReasoningParser] | None = None
+ tool_parser_cls: type[ToolParser] | None = None
+
+ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+ """
+ Initialize the Parser.
+
+ Args:
+ tokenizer: The tokenizer used by the model. This is required for
+ token-based parsing operations.
+ """
+ self.model_tokenizer = tokenizer
+ self._reasoning_parser: ReasoningParser | None = None
+ self._tool_parser: ToolParser | None = None
+
+ @cached_property
+ def vocab(self) -> dict[str, int]:
+ """Get the vocabulary mapping from tokens to IDs."""
+ return self.model_tokenizer.get_vocab()
+
+ @property
+ def reasoning_parser(self) -> ReasoningParser | None:
+ """The underlying reasoning parser, if any."""
+ return self._reasoning_parser
+
+ @reasoning_parser.setter
+ def reasoning_parser(self, parser: ReasoningParser | None) -> None:
+ self._reasoning_parser = parser
+
+ @property
+ def tool_parser(self) -> ToolParser | None:
+ """The underlying tool parser, if any."""
+ return self._tool_parser
+
+ @tool_parser.setter
+ def tool_parser(self, parser: ToolParser | None) -> None:
+ self._tool_parser = parser
+
+ # ========== Reasoning Parser Methods ==========
+
+ @abstractmethod
+ def is_reasoning_end(self, input_ids: list[int]) -> bool:
+ """
+ Check if the reasoning content ends in the input_ids.
+
+ Used by structured engines like `xgrammar` to check if the
+ reasoning content ends in the model output.
+
+ Args:
+ input_ids: The token IDs of the model output.
+
+ Returns:
+ True if the reasoning content ends in the input_ids.
+ """
+
+ def is_reasoning_end_streaming(
+ self, input_ids: list[int], delta_ids: list[int]
+ ) -> bool:
+ """
+ Check if the reasoning content ends during a decode step.
+
+ Args:
+ input_ids: The entire model output token IDs.
+ delta_ids: The last few computed tokens at the current decode step.
+
+ Returns:
+ True if the reasoning content ends in the delta_ids.
+ """
+ return self.is_reasoning_end(input_ids)
+
+ @abstractmethod
+ def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+ """
+ Extract content token IDs from the input_ids.
+
+ This extracts the non-reasoning content (e.g., everything after
+ the tag).
+
+ Args:
+ input_ids: The token IDs of the model output.
+
+ Returns:
+ The extracted content token IDs.
+ """
+
+ @abstractmethod
+ def extract_reasoning(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest | ResponsesRequest,
+ ) -> tuple[str | None, str | None]:
+ """
+ Extract reasoning content from a complete model-generated string.
+
+ Used for non-streaming responses where we have the entire model
+ response available before sending to the client.
+
+ Args:
+ model_output: The complete model-generated string.
+ request: The request object used to generate the output.
+
+ Returns:
+ A tuple of (reasoning_content, response_content).
+ """
+
+ @abstractmethod
+ def extract_reasoning_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ ) -> DeltaMessage | None:
+ """
+ Extract reasoning content from a streaming delta message.
+
+ Args:
+ previous_text: Text from all previous tokens.
+ current_text: Text including the current delta.
+ delta_text: The new text in this delta.
+ previous_token_ids: Token IDs from previous generation.
+ current_token_ids: All token IDs including current.
+ delta_token_ids: The new token IDs in this delta.
+
+ Returns:
+ A DeltaMessage with reasoning and/or content fields, or None.
+ """
+
+ # ========== Tool Parser Methods ==========
+
+ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+ """
+ Adjust the request parameters for tool calling.
+
+ Can be overridden by subclasses to modify request parameters
+ (e.g., setting structured output schemas for tool calling).
+
+ Args:
+ request: The original request.
+
+ Returns:
+ The adjusted request.
+ """
+ return request
+
+ @abstractmethod
+ def extract_tool_calls(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest,
+ ) -> ExtractedToolCallInformation:
+ """
+ Extract tool calls from a complete model-generated string.
+
+ Used for non-streaming responses.
+
+ Args:
+ model_output: The complete model-generated string.
+ request: The request object used to generate the output.
+
+ Returns:
+ ExtractedToolCallInformation containing the tool calls.
+ """
+
+ @abstractmethod
+ def extract_tool_calls_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ request: ChatCompletionRequest,
+ ) -> DeltaMessage | None:
+ """
+ Extract tool calls from a streaming delta message.
+
+ Args:
+ previous_text: Text from all previous tokens.
+ current_text: Text including the current delta.
+ delta_text: The new text in this delta.
+ previous_token_ids: Token IDs from previous generation.
+ current_token_ids: All token IDs including current.
+ delta_token_ids: The new token IDs in this delta.
+ request: The request object.
+
+ Returns:
+ A DeltaMessage with tool_calls field, or None.
+ """
+
+
+class DelegatingParser(Parser):
+ """
+ A Parser implementation that delegates to separate ReasoningParser and
+ ToolParser instances.
+
+ This is the recommended base class for creating model-specific parsers
+ that combine existing reasoning and tool parser implementations.
+ Subclasses should set `self._reasoning_parser` and `self._tool_parser`
+ in their `__init__` method.
+
+ If either parser is None, the corresponding methods will return default
+ values (no reasoning extraction, no tool calls).
+ """
+
+ def extract_reasoning(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest | ResponsesRequest,
+ ) -> tuple[str | None, str | None]:
+ if self._reasoning_parser is None:
+ return None, model_output
+ return self._reasoning_parser.extract_reasoning(model_output, request)
+
+ def extract_reasoning_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ ) -> DeltaMessage | None:
+ if self._reasoning_parser is None:
+ return DeltaMessage(content=delta_text)
+ return self._reasoning_parser.extract_reasoning_streaming(
+ previous_text,
+ current_text,
+ delta_text,
+ previous_token_ids,
+ current_token_ids,
+ delta_token_ids,
+ )
+
+ def extract_tool_calls(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest,
+ ) -> ExtractedToolCallInformation:
+ if self._tool_parser is None:
+ return ExtractedToolCallInformation(
+ tools_called=False, tool_calls=[], content=model_output
+ )
+ return self._tool_parser.extract_tool_calls(model_output, request)
+
+ def extract_tool_calls_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ request: ChatCompletionRequest,
+ ) -> DeltaMessage | None:
+ if self._tool_parser is None:
+ return None
+ return self._tool_parser.extract_tool_calls_streaming(
+ previous_text,
+ current_text,
+ delta_text,
+ previous_token_ids,
+ current_token_ids,
+ delta_token_ids,
+ request,
+ )
+
+
+class _WrappedParser(DelegatingParser):
+ """
+ A DelegatingParser subclass that instantiates parsers from class attributes.
+
+ This class is used to dynamically create a parser that wraps individual
+ ReasoningParser and ToolParser classes. The class attributes
+ `reasoning_parser_cls` and `tool_parser_cls` should be set before
+ instantiation.
+
+ Usage:
+ _WrappedParser.reasoning_parser_cls = MyReasoningParser
+ _WrappedParser.tool_parser_cls = MyToolParser
+ parser = _WrappedParser(tokenizer)
+ """
+
+ reasoning_parser_cls: type[ReasoningParser] | None = None
+ tool_parser_cls: type[ToolParser] | None = None
+
+ def __init__(self, tokenizer: TokenizerLike):
+ super().__init__(tokenizer)
+ # Instantiate the underlying parsers from class attributes
+ if self.__class__.reasoning_parser_cls is not None:
+ self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
+ if self.__class__.tool_parser_cls is not None:
+ self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
diff --git a/vllm/parser/minimax_m2_parser.py b/vllm/parser/minimax_m2_parser.py
new file mode 100644
index 000000000..ee092d4f5
--- /dev/null
+++ b/vllm/parser/minimax_m2_parser.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+MiniMax M2 Parser - A unified parser for MiniMax M2 models.
+
+This parser combines the existing MiniMaxM2ReasoningParser and
+MinimaxM2ToolParser into a single unified interface by delegating
+to those implementations.
+"""
+
+from vllm.logger import init_logger
+from vllm.parser.abstract_parser import DelegatingParser
+from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser
+
+logger = init_logger(__name__)
+
+
+class MiniMaxM2Parser(DelegatingParser):
+ """
+ Unified parser for MiniMax M2 models that handles both reasoning
+ extraction and tool call parsing.
+
+ This parser delegates to the existing implementations:
+ - MiniMaxM2ReasoningParser for reasoning extraction
+ - MinimaxM2ToolParser for tool call parsing
+
+ MiniMax M2 models have two special behaviors:
+ 1. Reasoning: They don't generate start token, only end
+ token. All content before is reasoning, content after is the
+ actual response.
+ 2. Tool Calls: They use ... tags
+ with ... and ...
+ syntax.
+ """
+
+ # Class-level parser classes for compatibility
+ reasoning_parser_cls = MiniMaxM2ReasoningParser
+ tool_parser_cls = MinimaxM2ToolParser
+
+ def __init__(self, tokenizer: TokenizerLike):
+ super().__init__(tokenizer)
+
+ # Initialize the underlying parsers
+ self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer)
+ self._tool_parser = MinimaxM2ToolParser(tokenizer)
+
+ logger.debug(
+ "vLLM Successfully initialized parser %s!", self.__class__.__name__
+ )
diff --git a/vllm/parser/parser_manager.py b/vllm/parser/parser_manager.py
new file mode 100644
index 000000000..4331eba98
--- /dev/null
+++ b/vllm/parser/parser_manager.py
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import importlib
+import os
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+
+from vllm.logger import init_logger
+from vllm.utils.collection_utils import is_list_of
+from vllm.utils.import_utils import import_from_path
+
+if TYPE_CHECKING:
+ from vllm.parser.abstract_parser import Parser
+ from vllm.reasoning import ReasoningParser
+ from vllm.tool_parsers import ToolParser
+
+logger = init_logger(__name__)
+
+
+class ParserManager:
+ """
+ Central registry for Parser implementations.
+
+ Supports two registration modes:
+ - Eager registration via `register_module`
+ - Lazy registration via `register_lazy_module`
+ """
+
+ parsers: dict[str, type[Parser]] = {}
+ lazy_parsers: dict[str, tuple[str, str]] = {} # name -> (module_path, class_name)
+
+ @classmethod
+ def get_parser_internal(cls, name: str) -> type[Parser]:
+ """
+ Retrieve a registered or lazily registered Parser class.
+
+ Args:
+ name: The registered name of the parser.
+
+ Returns:
+ The Parser class.
+
+ Raises:
+ KeyError: If no parser is found under the given name.
+ """
+ if name in cls.parsers:
+ return cls.parsers[name]
+
+ if name in cls.lazy_parsers:
+ return cls._load_lazy_parser(name)
+
+ registered = ", ".join(cls.list_registered())
+ raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")
+
+ @classmethod
+ def _load_lazy_parser(cls, name: str) -> type[Parser]:
+ """Import and register a lazily loaded parser."""
+ from vllm.parser.abstract_parser import Parser
+
+ module_path, class_name = cls.lazy_parsers[name]
+ try:
+ mod = importlib.import_module(module_path)
+ parser_cls = getattr(mod, class_name)
+ if not issubclass(parser_cls, Parser):
+ raise TypeError(
+ f"{class_name} in {module_path} is not a Parser subclass."
+ )
+ cls.parsers[name] = parser_cls # cache
+ return parser_cls
+ except Exception as e:
+ logger.exception(
+ "Failed to import lazy parser '%s' from %s: %s",
+ name,
+ module_path,
+ e,
+ )
+ raise
+
+ @classmethod
+ def _register_module(
+ cls,
+ module: type[Parser],
+ module_name: str | list[str] | None = None,
+ force: bool = True,
+ ) -> None:
+ """Register a Parser class immediately."""
+ from vllm.parser.abstract_parser import Parser
+
+ if not issubclass(module, Parser):
+ raise TypeError(
+ f"module must be subclass of Parser, but got {type(module)}"
+ )
+
+ if module_name is None:
+ module_names = [module.__name__]
+ elif isinstance(module_name, str):
+ module_names = [module_name]
+ elif is_list_of(module_name, str):
+ module_names = module_name
+ else:
+ raise TypeError("module_name must be str, list[str], or None.")
+
+ for name in module_names:
+ if not force and name in cls.parsers:
+ existed = cls.parsers[name]
+ raise KeyError(f"{name} is already registered at {existed.__module__}")
+ cls.parsers[name] = module
+
+ @classmethod
+ def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
+ """
+ Register a lazy module mapping for delayed import.
+
+ Example:
+ ParserManager.register_lazy_module(
+ name="minimax_m2",
+ module_path="vllm.parser.minimax_m2_parser",
+ class_name="MiniMaxM2Parser",
+ )
+ """
+ cls.lazy_parsers[name] = (module_path, class_name)
+
+ @classmethod
+ def register_module(
+ cls,
+ name: str | list[str] | None = None,
+ force: bool = True,
+ module: type[Parser] | None = None,
+ ) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
+ """
+ Register a Parser class.
+
+ Can be used as a decorator or called directly.
+
+ Usage:
+ @ParserManager.register_module("my_parser")
+ class MyParser(Parser):
+ ...
+
+ Or:
+ ParserManager.register_module(module=MyParser)
+ """
+ if not isinstance(force, bool):
+ raise TypeError(f"force must be a boolean, but got {type(force)}")
+
+ # Immediate registration
+ if module is not None:
+ cls._register_module(module=module, module_name=name, force=force)
+ return module
+
+ # Decorator usage
+ def _decorator(obj: type[Parser]) -> type[Parser]:
+ module_path = obj.__module__
+ class_name = obj.__name__
+
+ if isinstance(name, str):
+ names = [name]
+ elif is_list_of(name, str):
+ names = name
+ else:
+ names = [class_name]
+
+ for n in names:
+ cls.lazy_parsers[n] = (module_path, class_name)
+
+ return obj
+
+ return _decorator
+
+ @classmethod
+ def list_registered(cls) -> list[str]:
+ """Return names of all registered parsers."""
+ return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))
+
+ @classmethod
+ def import_parser(cls, plugin_path: str) -> None:
+ """Import a user-defined parser from an arbitrary path."""
+ module_name = os.path.splitext(os.path.basename(plugin_path))[0]
+ try:
+ import_from_path(module_name, plugin_path)
+ except Exception:
+ logger.exception(
+ "Failed to load module '%s' from %s.", module_name, plugin_path
+ )
+
+ @classmethod
+ def get_tool_parser(
+ cls,
+ tool_parser_name: str | None = None,
+ enable_auto_tools: bool = False,
+ model_name: str | None = None,
+ ) -> type[ToolParser] | None:
+ """Get the tool parser based on the name."""
+ from vllm.tool_parsers import ToolParserManager
+
+ parser: type[ToolParser] | None = None
+ if not enable_auto_tools or tool_parser_name is None:
+ return parser
+ logger.info('"auto" tool choice has been enabled.')
+
+ try:
+ if (
+ tool_parser_name == "pythonic"
+ and model_name
+ and model_name.startswith("meta-llama/Llama-3.2")
+ ):
+ logger.warning(
+ "Llama3.2 models may struggle to emit valid pythonic tool calls"
+ )
+ parser = ToolParserManager.get_tool_parser(tool_parser_name)
+ except Exception as e:
+ raise TypeError(
+ "Error: --enable-auto-tool-choice requires "
+ f"tool_parser:'{tool_parser_name}' which has not "
+ "been registered"
+ ) from e
+ return parser
+
+ @classmethod
+ def get_reasoning_parser(
+ cls,
+ reasoning_parser_name: str | None,
+ ) -> type[ReasoningParser] | None:
+ """Get the reasoning parser based on the name."""
+ from vllm.reasoning import ReasoningParserManager
+
+ parser: type[ReasoningParser] | None = None
+ if not reasoning_parser_name:
+ return None
+ try:
+ parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
+ assert parser is not None
+ except Exception as e:
+ raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
+ return parser
+
+ @classmethod
+ def get_parser(
+ cls,
+ tool_parser_name: str | None = None,
+ reasoning_parser_name: str | None = None,
+ enable_auto_tools: bool = False,
+ model_name: str | None = None,
+ ) -> type[Parser] | None:
+ """
+ Get a unified Parser that handles both reasoning and tool parsing.
+
+ This method checks if a unified Parser exists that can handle both
+ reasoning extraction and tool call parsing. If no unified parser
+ exists, it creates a DelegatingParser that wraps the individual
+ reasoning and tool parsers.
+
+ Args:
+ tool_parser_name: The name of the tool parser.
+ reasoning_parser_name: The name of the reasoning parser.
+ enable_auto_tools: Whether auto tool choice is enabled.
+ model_name: The model name for parser-specific warnings.
+
+ Returns:
+ A Parser class, or None if neither parser is specified.
+ """
+ from vllm.parser.abstract_parser import _WrappedParser
+
+ if not tool_parser_name and not reasoning_parser_name:
+ return None
+
+ # Strategy 1: If both names match, check for a unified parser with that name
+ if tool_parser_name and tool_parser_name == reasoning_parser_name:
+ try:
+ parser = cls.get_parser_internal(tool_parser_name)
+ logger.info(
+ "Using unified parser '%s' for both reasoning and tool parsing.",
+ tool_parser_name,
+ )
+ return parser
+ except KeyError:
+ pass # No unified parser with this name
+
+ # Strategy 2: Check for parser with either name
+ for name in [tool_parser_name, reasoning_parser_name]:
+ if name:
+ try:
+ parser = cls.get_parser_internal(name)
+ logger.info(
+ "Using unified parser '%s' for reasoning and tool parsing.",
+ name,
+ )
+ return parser
+ except KeyError:
+ pass
+
+ # Strategy 3: Create a DelegatingParser with the individual parser classes
+ reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
+ tool_parser_cls = cls.get_tool_parser(
+ tool_parser_name, enable_auto_tools, model_name
+ )
+
+ if reasoning_parser_cls is None and tool_parser_cls is None:
+ return None
+
+ # Set the class-level attributes on the imported _WrappedParser
+ _WrappedParser.reasoning_parser_cls = reasoning_parser_cls
+ _WrappedParser.tool_parser_cls = tool_parser_cls
+
+ return _WrappedParser