[1/N] Initial Implementation of Parser for ResponsesAPI (#32712)

Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
2026-02-03 21:59:03 -05:00
parent 02080179a3
commit e1bf04b6c2
10 changed files with 772 additions and 61 deletions
--- a/tests/entrypoints/openai/test_chat_error.py
+++ b/tests/entrypoints/openai/test_chat_error.py
@@ -36,6 +36,7 @@ class MockHFConfig:
 class MockModelConfig:
    task = "generate"
    runner_type = "generate"
+    model = MODEL_NAME
    tokenizer = MODEL_NAME
    trust_remote_code = False
    tokenizer_mode = "auto"
--- a/tests/entrypoints/openai/test_completion_error.py
+++ b/tests/entrypoints/openai/test_completion_error.py
@@ -36,6 +36,7 @@ class MockHFConfig:
 class MockModelConfig:
    task = "generate"
    runner_type = "generate"
+    model = MODEL_NAME
    tokenizer = MODEL_NAME
    trust_remote_code = False
    tokenizer_mode = "auto"
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -511,6 +511,7 @@ class MockHFConfig:
 class MockModelConfig:
    task = "generate"
    runner_type = "generate"
+    model = MODEL_NAME
    tokenizer = MODEL_NAME
    trust_remote_code = False
    tokenizer_mode = "auto"
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt
 from vllm.logger import init_logger
 from vllm.logprobs import Logprob
 from vllm.outputs import CompletionOutput, RequestOutput
+from vllm.parser import ParserManager
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.tokenizers import TokenizerLike
 from vllm.tokenizers.mistral import (
@@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing):
        self.logits_processors = self.model_config.logits_processors

        # set up reasoning parser
-        self.reasoning_parser = self._get_reasoning_parser(
+        self.reasoning_parser = ParserManager.get_reasoning_parser(
            reasoning_parser_name=reasoning_parser
        )
        # set up tool use
        self.enable_auto_tools: bool = enable_auto_tools
-        self.tool_parser = self._get_tool_parser(
-            tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
+        self.tool_parser = ParserManager.get_tool_parser(
+            tool_parser_name=tool_parser,
+            enable_auto_tools=enable_auto_tools,
+            model_name=self.model_config.model,
        )
        self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none

--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest
 from vllm.multimodal import MultiModalDataDict
 from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
 from vllm.pooling_params import PoolingParams
-from vllm.reasoning import ReasoningParser, ReasoningParserManager
 from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers import ToolParser, ToolParserManager
+from vllm.tool_parsers import ToolParser
 from vllm.tracing import (
    contains_trace_headers,
    extract_trace_headers,
@@ -246,46 +245,6 @@ class OpenAIServing:
        self.model_config = self.models.model_config
        self.max_model_len = self.model_config.max_model_len

-    def _get_tool_parser(
-        self, tool_parser_name: str | None = None, enable_auto_tools: bool = False
-    ) -> Callable[[TokenizerLike], ToolParser] | None:
-        """Get the tool parser based on the name."""
-        parser = None
-        if not enable_auto_tools or tool_parser_name is None:
-            return parser
-        logger.info('"auto" tool choice has been enabled.')
-
-        try:
-            if tool_parser_name == "pythonic" and self.model_config.model.startswith(
-                "meta-llama/Llama-3.2"
-            ):
-                logger.warning(
-                    "Llama3.2 models may struggle to emit valid pythonic tool calls"
-                )
-            parser = ToolParserManager.get_tool_parser(tool_parser_name)
-        except Exception as e:
-            raise TypeError(
-                "Error: --enable-auto-tool-choice requires "
-                f"tool_parser:'{tool_parser_name}' which has not "
-                "been registered"
-            ) from e
-        return parser
-
-    def _get_reasoning_parser(
-        self,
-        reasoning_parser_name: str,
-    ) -> Callable[[TokenizerLike], ReasoningParser] | None:
-        """Get the reasoning parser based on the name."""
-        parser = None
-        if not reasoning_parser_name:
-            return None
-        try:
-            parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
-            assert parser is not None
-        except Exception as e:
-            raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
-        return parser
-
    async def beam_search(
        self,
        prompt: PromptType,
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -123,6 +123,7 @@ from vllm.logger import init_logger
 from vllm.logprobs import Logprob as SampleLogprob
 from vllm.logprobs import SampleLogprobs
 from vllm.outputs import CompletionOutput
+from vllm.parser import ParserManager
 from vllm.sampling_params import SamplingParams, StructuredOutputsParams
 from vllm.tokenizers import TokenizerLike
 from vllm.utils import random_uuid
@@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing):
        self.chat_template_content_format: Final = chat_template_content_format
        self.enable_log_outputs = enable_log_outputs

-        self.reasoning_parser = self._get_reasoning_parser(
-            reasoning_parser_name=reasoning_parser
+        # Set up the unified parser - either a unified parser or fall back to
+        # separate parsers accessed through the parser interface
+        self.parser = ParserManager.get_parser(
+            tool_parser_name=tool_parser,
+            reasoning_parser_name=reasoning_parser,
+            enable_auto_tools=enable_auto_tools,
+            model_name=self.model_config.model,
        )
        self.enable_prompt_tokens_details = enable_prompt_tokens_details
        self.enable_force_include_usage = enable_force_include_usage
@@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing):
            self.tool_call_id_type = "random"

        self.enable_auto_tools = enable_auto_tools
-        # set up tool use
-        self.tool_parser = self._get_tool_parser(
-            tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
-        )
        # HACK(woosuk): This is a hack. We should use a better store.
        # FIXME: If enable_store=True, this may cause a memory leak since we
        # never remove responses from the store.
@@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing):
                        context = ParsableContext(
                            response_messages=messages,
                            tokenizer=tokenizer,
-                            reasoning_parser_cls=self.reasoning_parser,
+                            reasoning_parser_cls=self.parser.reasoning_parser_cls
+                            if self.parser
+                            else None,
                            request=request,
-                            tool_parser_cls=self.tool_parser,
+                            tool_parser_cls=self.parser.tool_parser_cls
+                            if self.parser
+                            else None,
                            available_tools=available_tools,
                            chat_template=self.chat_template,
                            chat_template_content_format=self.chat_template_content_format,
@@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing):
                    else:
                        context = SimpleContext()

-                if self.reasoning_parser is not None:
-                    reasoning_parser = self.reasoning_parser(tokenizer)
+                if self.parser and self.parser.reasoning_parser_cls is not None:
+                    reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
                    if (
                        isinstance(
                            struct_out := sampling_params.structured_outputs,
@@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing):
            default_template_content_format=self.chat_template_content_format,
            default_template_kwargs=None,
            tool_dicts=tool_dicts,
-            tool_parser=self.tool_parser,
+            tool_parser=self.parser.tool_parser_cls if self.parser else None,
        )
        return messages, engine_prompts

@@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing):
        final_output: CompletionOutput,
        tokenizer: TokenizerLike,
    ) -> list[ResponseOutputItem]:
-        if self.reasoning_parser:
+        if self.parser and self.parser.reasoning_parser_cls:
            try:
-                reasoning_parser = self.reasoning_parser(tokenizer)
+                reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
            except RuntimeError as e:
                logger.exception("Error in reasoning parser creation.")
                raise e
@@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing):
            tokenizer=tokenizer,
            content=content,
            enable_auto_tools=self.enable_auto_tools,
-            tool_parser_cls=self.tool_parser,
+            tool_parser_cls=self.parser.tool_parser_cls if self.parser else None,
        )

        if content or (self.use_harmony and tool_calls):
@@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing):
        current_output_index = 0
        current_item_id = ""
        reasoning_parser = None
-        if self.reasoning_parser:
-            reasoning_parser = self.reasoning_parser(tokenizer)
+        if self.parser and self.parser.reasoning_parser_cls:
+            reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
        previous_text = ""
        previous_token_ids: list[int] = []
        first_delta_sent = False
--- a/vllm/parser/init.py
+++ b/vllm/parser/init.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.parser.abstract_parser import (
+    DelegatingParser,
+    Parser,
+    _WrappedParser,
+)
+from vllm.parser.parser_manager import ParserManager
+
+__all__ = [
+    "Parser",
+    "DelegatingParser",
+    "ParserManager",
+    "_WrappedParser",
+]
+
+_PARSERS_TO_REGISTER = {
+    "minimax_m2": (  # name
+        "minimax_m2_parser",  # filename
+        "MiniMaxM2Parser",  # class_name
+    ),
+}
+
+# Register lazy parsers
+ParserManager.register_lazy_module(
+    name="minimax_m2",
+    module_path="vllm.parser.minimax_m2_parser",
+    class_name="MiniMaxM2Parser",
+)
+
+
+def register_lazy_parsers():
+    for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items():
+        module_path = f"vllm.parser.{file_name}"
+        ParserManager.register_lazy_module(name, module_path, class_name)
+
+
+register_lazy_parsers()
--- a/vllm/parser/abstract_parser.py
+++ b/vllm/parser/abstract_parser.py
@@ -0,0 +1,341 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import abstractmethod
+from collections.abc import Sequence
+from functools import cached_property
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaMessage,
+    ExtractedToolCallInformation,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+)
+from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import ToolParser
+
+
+class Parser:
+    """
+    Abstract Parser class that unifies ReasoningParser and ToolParser into
+    a single interface for parsing model output.
+
+    This class provides a unified way to handle both reasoning extraction
+    (e.g., chain-of-thought content in <think> tags) and tool call extraction
+    (e.g., function calls in XML/JSON format) from model outputs.
+
+    Subclasses can either:
+    1. Override the abstract methods directly for custom parsing logic
+    2. Set `reasoning_parser` and `tool_parser` properties to delegate to
+       existing parser implementations
+
+    Class Attributes:
+        reasoning_parser_cls: The ReasoningParser class to use (for compatibility
+            with code that needs the class, not instance).
+        tool_parser_cls: The ToolParser class to use (for compatibility with
+            code that needs the class, not instance).
+    """
+
+    # Class-level parser classes for compatibility with existing patterns
+    # Subclasses should override these if they use specific parser classes
+    reasoning_parser_cls: type[ReasoningParser] | None = None
+    tool_parser_cls: type[ToolParser] | None = None
+
+    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+        """
+        Initialize the Parser.
+
+        Args:
+            tokenizer: The tokenizer used by the model. This is required for
+                token-based parsing operations.
+        """
+        self.model_tokenizer = tokenizer
+        self._reasoning_parser: ReasoningParser | None = None
+        self._tool_parser: ToolParser | None = None
+
+    @cached_property
+    def vocab(self) -> dict[str, int]:
+        """Get the vocabulary mapping from tokens to IDs."""
+        return self.model_tokenizer.get_vocab()
+
+    @property
+    def reasoning_parser(self) -> ReasoningParser | None:
+        """The underlying reasoning parser, if any."""
+        return self._reasoning_parser
+
+    @reasoning_parser.setter
+    def reasoning_parser(self, parser: ReasoningParser | None) -> None:
+        self._reasoning_parser = parser
+
+    @property
+    def tool_parser(self) -> ToolParser | None:
+        """The underlying tool parser, if any."""
+        return self._tool_parser
+
+    @tool_parser.setter
+    def tool_parser(self, parser: ToolParser | None) -> None:
+        self._tool_parser = parser
+
+    # ========== Reasoning Parser Methods ==========
+
+    @abstractmethod
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        """
+        Check if the reasoning content ends in the input_ids.
+
+        Used by structured engines like `xgrammar` to check if the
+        reasoning content ends in the model output.
+
+        Args:
+            input_ids: The token IDs of the model output.
+
+        Returns:
+            True if the reasoning content ends in the input_ids.
+        """
+
+    def is_reasoning_end_streaming(
+        self, input_ids: list[int], delta_ids: list[int]
+    ) -> bool:
+        """
+        Check if the reasoning content ends during a decode step.
+
+        Args:
+            input_ids: The entire model output token IDs.
+            delta_ids: The last few computed tokens at the current decode step.
+
+        Returns:
+            True if the reasoning content ends in the delta_ids.
+        """
+        return self.is_reasoning_end(input_ids)
+
+    @abstractmethod
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        """
+        Extract content token IDs from the input_ids.
+
+        This extracts the non-reasoning content (e.g., everything after
+        the </think> tag).
+
+        Args:
+            input_ids: The token IDs of the model output.
+
+        Returns:
+            The extracted content token IDs.
+        """
+
+    @abstractmethod
+    def extract_reasoning(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest | ResponsesRequest,
+    ) -> tuple[str | None, str | None]:
+        """
+        Extract reasoning content from a complete model-generated string.
+
+        Used for non-streaming responses where we have the entire model
+        response available before sending to the client.
+
+        Args:
+            model_output: The complete model-generated string.
+            request: The request object used to generate the output.
+
+        Returns:
+            A tuple of (reasoning_content, response_content).
+        """
+
+    @abstractmethod
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> DeltaMessage | None:
+        """
+        Extract reasoning content from a streaming delta message.
+
+        Args:
+            previous_text: Text from all previous tokens.
+            current_text: Text including the current delta.
+            delta_text: The new text in this delta.
+            previous_token_ids: Token IDs from previous generation.
+            current_token_ids: All token IDs including current.
+            delta_token_ids: The new token IDs in this delta.
+
+        Returns:
+            A DeltaMessage with reasoning and/or content fields, or None.
+        """
+
+    # ========== Tool Parser Methods ==========
+
+    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+        """
+        Adjust the request parameters for tool calling.
+
+        Can be overridden by subclasses to modify request parameters
+        (e.g., setting structured output schemas for tool calling).
+
+        Args:
+            request: The original request.
+
+        Returns:
+            The adjusted request.
+        """
+        return request
+
+    @abstractmethod
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        """
+        Extract tool calls from a complete model-generated string.
+
+        Used for non-streaming responses.
+
+        Args:
+            model_output: The complete model-generated string.
+            request: The request object used to generate the output.
+
+        Returns:
+            ExtractedToolCallInformation containing the tool calls.
+        """
+
+    @abstractmethod
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        """
+        Extract tool calls from a streaming delta message.
+
+        Args:
+            previous_text: Text from all previous tokens.
+            current_text: Text including the current delta.
+            delta_text: The new text in this delta.
+            previous_token_ids: Token IDs from previous generation.
+            current_token_ids: All token IDs including current.
+            delta_token_ids: The new token IDs in this delta.
+            request: The request object.
+
+        Returns:
+            A DeltaMessage with tool_calls field, or None.
+        """
+
+
+class DelegatingParser(Parser):
+    """
+    A Parser implementation that delegates to separate ReasoningParser and
+    ToolParser instances.
+
+    This is the recommended base class for creating model-specific parsers
+    that combine existing reasoning and tool parser implementations.
+    Subclasses should set `self._reasoning_parser` and `self._tool_parser`
+    in their `__init__` method.
+
+    If either parser is None, the corresponding methods will return default
+    values (no reasoning extraction, no tool calls).
+    """
+
+    def extract_reasoning(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest | ResponsesRequest,
+    ) -> tuple[str | None, str | None]:
+        if self._reasoning_parser is None:
+            return None, model_output
+        return self._reasoning_parser.extract_reasoning(model_output, request)
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> DeltaMessage | None:
+        if self._reasoning_parser is None:
+            return DeltaMessage(content=delta_text)
+        return self._reasoning_parser.extract_reasoning_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+        )
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        if self._tool_parser is None:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+        return self._tool_parser.extract_tool_calls(model_output, request)
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        if self._tool_parser is None:
+            return None
+        return self._tool_parser.extract_tool_calls_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+            request,
+        )
+
+
+class _WrappedParser(DelegatingParser):
+    """
+    A DelegatingParser subclass that instantiates parsers from class attributes.
+
+    This class is used to dynamically create a parser that wraps individual
+    ReasoningParser and ToolParser classes. The class attributes
+    `reasoning_parser_cls` and `tool_parser_cls` should be set before
+    instantiation.
+
+    Usage:
+        _WrappedParser.reasoning_parser_cls = MyReasoningParser
+        _WrappedParser.tool_parser_cls = MyToolParser
+        parser = _WrappedParser(tokenizer)
+    """
+
+    reasoning_parser_cls: type[ReasoningParser] | None = None
+    tool_parser_cls: type[ToolParser] | None = None
+
+    def __init__(self, tokenizer: TokenizerLike):
+        super().__init__(tokenizer)
+        # Instantiate the underlying parsers from class attributes
+        if self.__class__.reasoning_parser_cls is not None:
+            self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
+        if self.__class__.tool_parser_cls is not None:
+            self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
--- a/vllm/parser/minimax_m2_parser.py
+++ b/vllm/parser/minimax_m2_parser.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+MiniMax M2 Parser - A unified parser for MiniMax M2 models.
+
+This parser combines the existing MiniMaxM2ReasoningParser and
+MinimaxM2ToolParser into a single unified interface by delegating
+to those implementations.
+"""
+
+from vllm.logger import init_logger
+from vllm.parser.abstract_parser import DelegatingParser
+from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser
+
+logger = init_logger(__name__)
+
+
+class MiniMaxM2Parser(DelegatingParser):
+    """
+    Unified parser for MiniMax M2 models that handles both reasoning
+    extraction and tool call parsing.
+
+    This parser delegates to the existing implementations:
+    - MiniMaxM2ReasoningParser for reasoning extraction
+    - MinimaxM2ToolParser for tool call parsing
+
+    MiniMax M2 models have two special behaviors:
+    1. Reasoning: They don't generate <think> start token, only </think> end
+       token. All content before </think> is reasoning, content after is the
+       actual response.
+    2. Tool Calls: They use <minimax:tool_call>...</minimax:tool_call> tags
+       with <invoke name="...">...</invoke> and <parameter name="...">...</parameter>
+       syntax.
+    """
+
+    # Class-level parser classes for compatibility
+    reasoning_parser_cls = MiniMaxM2ReasoningParser
+    tool_parser_cls = MinimaxM2ToolParser
+
+    def __init__(self, tokenizer: TokenizerLike):
+        super().__init__(tokenizer)
+
+        # Initialize the underlying parsers
+        self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer)
+        self._tool_parser = MinimaxM2ToolParser(tokenizer)
+
+        logger.debug(
+            "vLLM Successfully initialized parser %s!", self.__class__.__name__
+        )
--- a/vllm/parser/parser_manager.py
+++ b/vllm/parser/parser_manager.py
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import importlib
+import os
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+
+from vllm.logger import init_logger
+from vllm.utils.collection_utils import is_list_of
+from vllm.utils.import_utils import import_from_path
+
+if TYPE_CHECKING:
+    from vllm.parser.abstract_parser import Parser
+    from vllm.reasoning import ReasoningParser
+    from vllm.tool_parsers import ToolParser
+
+logger = init_logger(__name__)
+
+
+class ParserManager:
+    """
+    Central registry for Parser implementations.
+
+    Supports two registration modes:
+      - Eager registration via `register_module`
+      - Lazy registration via `register_lazy_module`
+    """
+
+    parsers: dict[str, type[Parser]] = {}
+    lazy_parsers: dict[str, tuple[str, str]] = {}  # name -> (module_path, class_name)
+
+    @classmethod
+    def get_parser_internal(cls, name: str) -> type[Parser]:
+        """
+        Retrieve a registered or lazily registered Parser class.
+
+        Args:
+            name: The registered name of the parser.
+
+        Returns:
+            The Parser class.
+
+        Raises:
+            KeyError: If no parser is found under the given name.
+        """
+        if name in cls.parsers:
+            return cls.parsers[name]
+
+        if name in cls.lazy_parsers:
+            return cls._load_lazy_parser(name)
+
+        registered = ", ".join(cls.list_registered())
+        raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")
+
+    @classmethod
+    def _load_lazy_parser(cls, name: str) -> type[Parser]:
+        """Import and register a lazily loaded parser."""
+        from vllm.parser.abstract_parser import Parser
+
+        module_path, class_name = cls.lazy_parsers[name]
+        try:
+            mod = importlib.import_module(module_path)
+            parser_cls = getattr(mod, class_name)
+            if not issubclass(parser_cls, Parser):
+                raise TypeError(
+                    f"{class_name} in {module_path} is not a Parser subclass."
+                )
+            cls.parsers[name] = parser_cls  # cache
+            return parser_cls
+        except Exception as e:
+            logger.exception(
+                "Failed to import lazy parser '%s' from %s: %s",
+                name,
+                module_path,
+                e,
+            )
+            raise
+
+    @classmethod
+    def _register_module(
+        cls,
+        module: type[Parser],
+        module_name: str | list[str] | None = None,
+        force: bool = True,
+    ) -> None:
+        """Register a Parser class immediately."""
+        from vllm.parser.abstract_parser import Parser
+
+        if not issubclass(module, Parser):
+            raise TypeError(
+                f"module must be subclass of Parser, but got {type(module)}"
+            )
+
+        if module_name is None:
+            module_names = [module.__name__]
+        elif isinstance(module_name, str):
+            module_names = [module_name]
+        elif is_list_of(module_name, str):
+            module_names = module_name
+        else:
+            raise TypeError("module_name must be str, list[str], or None.")
+
+        for name in module_names:
+            if not force and name in cls.parsers:
+                existed = cls.parsers[name]
+                raise KeyError(f"{name} is already registered at {existed.__module__}")
+            cls.parsers[name] = module
+
+    @classmethod
+    def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
+        """
+        Register a lazy module mapping for delayed import.
+
+        Example:
+            ParserManager.register_lazy_module(
+                name="minimax_m2",
+                module_path="vllm.parser.minimax_m2_parser",
+                class_name="MiniMaxM2Parser",
+            )
+        """
+        cls.lazy_parsers[name] = (module_path, class_name)
+
+    @classmethod
+    def register_module(
+        cls,
+        name: str | list[str] | None = None,
+        force: bool = True,
+        module: type[Parser] | None = None,
+    ) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
+        """
+        Register a Parser class.
+
+        Can be used as a decorator or called directly.
+
+        Usage:
+            @ParserManager.register_module("my_parser")
+            class MyParser(Parser):
+                ...
+
+        Or:
+            ParserManager.register_module(module=MyParser)
+        """
+        if not isinstance(force, bool):
+            raise TypeError(f"force must be a boolean, but got {type(force)}")
+
+        # Immediate registration
+        if module is not None:
+            cls._register_module(module=module, module_name=name, force=force)
+            return module
+
+        # Decorator usage
+        def _decorator(obj: type[Parser]) -> type[Parser]:
+            module_path = obj.__module__
+            class_name = obj.__name__
+
+            if isinstance(name, str):
+                names = [name]
+            elif is_list_of(name, str):
+                names = name
+            else:
+                names = [class_name]
+
+            for n in names:
+                cls.lazy_parsers[n] = (module_path, class_name)
+
+            return obj
+
+        return _decorator
+
+    @classmethod
+    def list_registered(cls) -> list[str]:
+        """Return names of all registered parsers."""
+        return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))
+
+    @classmethod
+    def import_parser(cls, plugin_path: str) -> None:
+        """Import a user-defined parser from an arbitrary path."""
+        module_name = os.path.splitext(os.path.basename(plugin_path))[0]
+        try:
+            import_from_path(module_name, plugin_path)
+        except Exception:
+            logger.exception(
+                "Failed to load module '%s' from %s.", module_name, plugin_path
+            )
+
+    @classmethod
+    def get_tool_parser(
+        cls,
+        tool_parser_name: str | None = None,
+        enable_auto_tools: bool = False,
+        model_name: str | None = None,
+    ) -> type[ToolParser] | None:
+        """Get the tool parser based on the name."""
+        from vllm.tool_parsers import ToolParserManager
+
+        parser: type[ToolParser] | None = None
+        if not enable_auto_tools or tool_parser_name is None:
+            return parser
+        logger.info('"auto" tool choice has been enabled.')
+
+        try:
+            if (
+                tool_parser_name == "pythonic"
+                and model_name
+                and model_name.startswith("meta-llama/Llama-3.2")
+            ):
+                logger.warning(
+                    "Llama3.2 models may struggle to emit valid pythonic tool calls"
+                )
+            parser = ToolParserManager.get_tool_parser(tool_parser_name)
+        except Exception as e:
+            raise TypeError(
+                "Error: --enable-auto-tool-choice requires "
+                f"tool_parser:'{tool_parser_name}' which has not "
+                "been registered"
+            ) from e
+        return parser
+
+    @classmethod
+    def get_reasoning_parser(
+        cls,
+        reasoning_parser_name: str | None,
+    ) -> type[ReasoningParser] | None:
+        """Get the reasoning parser based on the name."""
+        from vllm.reasoning import ReasoningParserManager
+
+        parser: type[ReasoningParser] | None = None
+        if not reasoning_parser_name:
+            return None
+        try:
+            parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
+            assert parser is not None
+        except Exception as e:
+            raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
+        return parser
+
+    @classmethod
+    def get_parser(
+        cls,
+        tool_parser_name: str | None = None,
+        reasoning_parser_name: str | None = None,
+        enable_auto_tools: bool = False,
+        model_name: str | None = None,
+    ) -> type[Parser] | None:
+        """
+        Get a unified Parser that handles both reasoning and tool parsing.
+
+        This method checks if a unified Parser exists that can handle both
+        reasoning extraction and tool call parsing. If no unified parser
+        exists, it creates a DelegatingParser that wraps the individual
+        reasoning and tool parsers.
+
+        Args:
+            tool_parser_name: The name of the tool parser.
+            reasoning_parser_name: The name of the reasoning parser.
+            enable_auto_tools: Whether auto tool choice is enabled.
+            model_name: The model name for parser-specific warnings.
+
+        Returns:
+            A Parser class, or None if neither parser is specified.
+        """
+        from vllm.parser.abstract_parser import _WrappedParser
+
+        if not tool_parser_name and not reasoning_parser_name:
+            return None
+
+        # Strategy 1: If both names match, check for a unified parser with that name
+        if tool_parser_name and tool_parser_name == reasoning_parser_name:
+            try:
+                parser = cls.get_parser_internal(tool_parser_name)
+                logger.info(
+                    "Using unified parser '%s' for both reasoning and tool parsing.",
+                    tool_parser_name,
+                )
+                return parser
+            except KeyError:
+                pass  # No unified parser with this name
+
+        # Strategy 2: Check for parser with either name
+        for name in [tool_parser_name, reasoning_parser_name]:
+            if name:
+                try:
+                    parser = cls.get_parser_internal(name)
+                    logger.info(
+                        "Using unified parser '%s' for reasoning and tool parsing.",
+                        name,
+                    )
+                    return parser
+                except KeyError:
+                    pass
+
+        # Strategy 3: Create a DelegatingParser with the individual parser classes
+        reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
+        tool_parser_cls = cls.get_tool_parser(
+            tool_parser_name, enable_auto_tools, model_name
+        )
+
+        if reasoning_parser_cls is None and tool_parser_cls is None:
+            return None
+
+        # Set the class-level attributes on the imported _WrappedParser
+        _WrappedParser.reasoning_parser_cls = reasoning_parser_cls
+        _WrappedParser.tool_parser_cls = tool_parser_cls
+
+        return _WrappedParser