[1/N] Initial Implementation of Parser for ResponsesAPI (#32712)
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
@@ -36,6 +36,7 @@ class MockHFConfig:
|
||||
class MockModelConfig:
|
||||
task = "generate"
|
||||
runner_type = "generate"
|
||||
model = MODEL_NAME
|
||||
tokenizer = MODEL_NAME
|
||||
trust_remote_code = False
|
||||
tokenizer_mode = "auto"
|
||||
|
||||
@@ -36,6 +36,7 @@ class MockHFConfig:
|
||||
class MockModelConfig:
|
||||
task = "generate"
|
||||
runner_type = "generate"
|
||||
model = MODEL_NAME
|
||||
tokenizer = MODEL_NAME
|
||||
trust_remote_code = False
|
||||
tokenizer_mode = "auto"
|
||||
|
||||
@@ -511,6 +511,7 @@ class MockHFConfig:
|
||||
class MockModelConfig:
|
||||
task = "generate"
|
||||
runner_type = "generate"
|
||||
model = MODEL_NAME
|
||||
tokenizer = MODEL_NAME
|
||||
trust_remote_code = False
|
||||
tokenizer_mode = "auto"
|
||||
|
||||
@@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
from vllm.parser import ParserManager
|
||||
from vllm.sampling_params import BeamSearchParams, SamplingParams
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers.mistral import (
|
||||
@@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing):
|
||||
self.logits_processors = self.model_config.logits_processors
|
||||
|
||||
# set up reasoning parser
|
||||
self.reasoning_parser = self._get_reasoning_parser(
|
||||
self.reasoning_parser = ParserManager.get_reasoning_parser(
|
||||
reasoning_parser_name=reasoning_parser
|
||||
)
|
||||
# set up tool use
|
||||
self.enable_auto_tools: bool = enable_auto_tools
|
||||
self.tool_parser = self._get_tool_parser(
|
||||
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
|
||||
self.tool_parser = ParserManager.get_tool_parser(
|
||||
tool_parser_name=tool_parser,
|
||||
enable_auto_tools=enable_auto_tools,
|
||||
model_name=self.model_config.model,
|
||||
)
|
||||
self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none
|
||||
|
||||
|
||||
@@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal import MultiModalDataDict
|
||||
from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
|
||||
from vllm.sampling_params import BeamSearchParams, SamplingParams
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers import ToolParser, ToolParserManager
|
||||
from vllm.tool_parsers import ToolParser
|
||||
from vllm.tracing import (
|
||||
contains_trace_headers,
|
||||
extract_trace_headers,
|
||||
@@ -246,46 +245,6 @@ class OpenAIServing:
|
||||
self.model_config = self.models.model_config
|
||||
self.max_model_len = self.model_config.max_model_len
|
||||
|
||||
def _get_tool_parser(
|
||||
self, tool_parser_name: str | None = None, enable_auto_tools: bool = False
|
||||
) -> Callable[[TokenizerLike], ToolParser] | None:
|
||||
"""Get the tool parser based on the name."""
|
||||
parser = None
|
||||
if not enable_auto_tools or tool_parser_name is None:
|
||||
return parser
|
||||
logger.info('"auto" tool choice has been enabled.')
|
||||
|
||||
try:
|
||||
if tool_parser_name == "pythonic" and self.model_config.model.startswith(
|
||||
"meta-llama/Llama-3.2"
|
||||
):
|
||||
logger.warning(
|
||||
"Llama3.2 models may struggle to emit valid pythonic tool calls"
|
||||
)
|
||||
parser = ToolParserManager.get_tool_parser(tool_parser_name)
|
||||
except Exception as e:
|
||||
raise TypeError(
|
||||
"Error: --enable-auto-tool-choice requires "
|
||||
f"tool_parser:'{tool_parser_name}' which has not "
|
||||
"been registered"
|
||||
) from e
|
||||
return parser
|
||||
|
||||
def _get_reasoning_parser(
|
||||
self,
|
||||
reasoning_parser_name: str,
|
||||
) -> Callable[[TokenizerLike], ReasoningParser] | None:
|
||||
"""Get the reasoning parser based on the name."""
|
||||
parser = None
|
||||
if not reasoning_parser_name:
|
||||
return None
|
||||
try:
|
||||
parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
|
||||
assert parser is not None
|
||||
except Exception as e:
|
||||
raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
|
||||
return parser
|
||||
|
||||
async def beam_search(
|
||||
self,
|
||||
prompt: PromptType,
|
||||
|
||||
@@ -123,6 +123,7 @@ from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob as SampleLogprob
|
||||
from vllm.logprobs import SampleLogprobs
|
||||
from vllm.outputs import CompletionOutput
|
||||
from vllm.parser import ParserManager
|
||||
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.utils import random_uuid
|
||||
@@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
self.chat_template_content_format: Final = chat_template_content_format
|
||||
self.enable_log_outputs = enable_log_outputs
|
||||
|
||||
self.reasoning_parser = self._get_reasoning_parser(
|
||||
reasoning_parser_name=reasoning_parser
|
||||
# Set up the unified parser - either a unified parser or fall back to
|
||||
# separate parsers accessed through the parser interface
|
||||
self.parser = ParserManager.get_parser(
|
||||
tool_parser_name=tool_parser,
|
||||
reasoning_parser_name=reasoning_parser,
|
||||
enable_auto_tools=enable_auto_tools,
|
||||
model_name=self.model_config.model,
|
||||
)
|
||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
@@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
self.tool_call_id_type = "random"
|
||||
|
||||
self.enable_auto_tools = enable_auto_tools
|
||||
# set up tool use
|
||||
self.tool_parser = self._get_tool_parser(
|
||||
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
|
||||
)
|
||||
# HACK(woosuk): This is a hack. We should use a better store.
|
||||
# FIXME: If enable_store=True, this may cause a memory leak since we
|
||||
# never remove responses from the store.
|
||||
@@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
context = ParsableContext(
|
||||
response_messages=messages,
|
||||
tokenizer=tokenizer,
|
||||
reasoning_parser_cls=self.reasoning_parser,
|
||||
reasoning_parser_cls=self.parser.reasoning_parser_cls
|
||||
if self.parser
|
||||
else None,
|
||||
request=request,
|
||||
tool_parser_cls=self.tool_parser,
|
||||
tool_parser_cls=self.parser.tool_parser_cls
|
||||
if self.parser
|
||||
else None,
|
||||
available_tools=available_tools,
|
||||
chat_template=self.chat_template,
|
||||
chat_template_content_format=self.chat_template_content_format,
|
||||
@@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
else:
|
||||
context = SimpleContext()
|
||||
|
||||
if self.reasoning_parser is not None:
|
||||
reasoning_parser = self.reasoning_parser(tokenizer)
|
||||
if self.parser and self.parser.reasoning_parser_cls is not None:
|
||||
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
|
||||
if (
|
||||
isinstance(
|
||||
struct_out := sampling_params.structured_outputs,
|
||||
@@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
default_template_content_format=self.chat_template_content_format,
|
||||
default_template_kwargs=None,
|
||||
tool_dicts=tool_dicts,
|
||||
tool_parser=self.tool_parser,
|
||||
tool_parser=self.parser.tool_parser_cls if self.parser else None,
|
||||
)
|
||||
return messages, engine_prompts
|
||||
|
||||
@@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
final_output: CompletionOutput,
|
||||
tokenizer: TokenizerLike,
|
||||
) -> list[ResponseOutputItem]:
|
||||
if self.reasoning_parser:
|
||||
if self.parser and self.parser.reasoning_parser_cls:
|
||||
try:
|
||||
reasoning_parser = self.reasoning_parser(tokenizer)
|
||||
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
|
||||
except RuntimeError as e:
|
||||
logger.exception("Error in reasoning parser creation.")
|
||||
raise e
|
||||
@@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
tokenizer=tokenizer,
|
||||
content=content,
|
||||
enable_auto_tools=self.enable_auto_tools,
|
||||
tool_parser_cls=self.tool_parser,
|
||||
tool_parser_cls=self.parser.tool_parser_cls if self.parser else None,
|
||||
)
|
||||
|
||||
if content or (self.use_harmony and tool_calls):
|
||||
@@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
current_output_index = 0
|
||||
current_item_id = ""
|
||||
reasoning_parser = None
|
||||
if self.reasoning_parser:
|
||||
reasoning_parser = self.reasoning_parser(tokenizer)
|
||||
if self.parser and self.parser.reasoning_parser_cls:
|
||||
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
|
||||
previous_text = ""
|
||||
previous_token_ids: list[int] = []
|
||||
first_delta_sent = False
|
||||
|
||||
39
vllm/parser/__init__.py
Normal file
39
vllm/parser/__init__.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm.parser.abstract_parser import (
|
||||
DelegatingParser,
|
||||
Parser,
|
||||
_WrappedParser,
|
||||
)
|
||||
from vllm.parser.parser_manager import ParserManager
|
||||
|
||||
__all__ = [
|
||||
"Parser",
|
||||
"DelegatingParser",
|
||||
"ParserManager",
|
||||
"_WrappedParser",
|
||||
]
|
||||
|
||||
_PARSERS_TO_REGISTER = {
|
||||
"minimax_m2": ( # name
|
||||
"minimax_m2_parser", # filename
|
||||
"MiniMaxM2Parser", # class_name
|
||||
),
|
||||
}
|
||||
|
||||
# Register lazy parsers
|
||||
ParserManager.register_lazy_module(
|
||||
name="minimax_m2",
|
||||
module_path="vllm.parser.minimax_m2_parser",
|
||||
class_name="MiniMaxM2Parser",
|
||||
)
|
||||
|
||||
|
||||
def register_lazy_parsers():
|
||||
for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items():
|
||||
module_path = f"vllm.parser.{file_name}"
|
||||
ParserManager.register_lazy_module(name, module_path, class_name)
|
||||
|
||||
|
||||
register_lazy_parsers()
|
||||
341
vllm/parser/abstract_parser.py
Normal file
341
vllm/parser/abstract_parser.py
Normal file
@@ -0,0 +1,341 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from abc import abstractmethod
|
||||
from collections.abc import Sequence
|
||||
from functools import cached_property
|
||||
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import (
|
||||
ChatCompletionRequest,
|
||||
)
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
DeltaMessage,
|
||||
ExtractedToolCallInformation,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponsesRequest,
|
||||
)
|
||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.abstract_tool_parser import ToolParser
|
||||
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Abstract Parser class that unifies ReasoningParser and ToolParser into
|
||||
a single interface for parsing model output.
|
||||
|
||||
This class provides a unified way to handle both reasoning extraction
|
||||
(e.g., chain-of-thought content in <think> tags) and tool call extraction
|
||||
(e.g., function calls in XML/JSON format) from model outputs.
|
||||
|
||||
Subclasses can either:
|
||||
1. Override the abstract methods directly for custom parsing logic
|
||||
2. Set `reasoning_parser` and `tool_parser` properties to delegate to
|
||||
existing parser implementations
|
||||
|
||||
Class Attributes:
|
||||
reasoning_parser_cls: The ReasoningParser class to use (for compatibility
|
||||
with code that needs the class, not instance).
|
||||
tool_parser_cls: The ToolParser class to use (for compatibility with
|
||||
code that needs the class, not instance).
|
||||
"""
|
||||
|
||||
# Class-level parser classes for compatibility with existing patterns
|
||||
# Subclasses should override these if they use specific parser classes
|
||||
reasoning_parser_cls: type[ReasoningParser] | None = None
|
||||
tool_parser_cls: type[ToolParser] | None = None
|
||||
|
||||
def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
|
||||
"""
|
||||
Initialize the Parser.
|
||||
|
||||
Args:
|
||||
tokenizer: The tokenizer used by the model. This is required for
|
||||
token-based parsing operations.
|
||||
"""
|
||||
self.model_tokenizer = tokenizer
|
||||
self._reasoning_parser: ReasoningParser | None = None
|
||||
self._tool_parser: ToolParser | None = None
|
||||
|
||||
@cached_property
|
||||
def vocab(self) -> dict[str, int]:
|
||||
"""Get the vocabulary mapping from tokens to IDs."""
|
||||
return self.model_tokenizer.get_vocab()
|
||||
|
||||
@property
|
||||
def reasoning_parser(self) -> ReasoningParser | None:
|
||||
"""The underlying reasoning parser, if any."""
|
||||
return self._reasoning_parser
|
||||
|
||||
@reasoning_parser.setter
|
||||
def reasoning_parser(self, parser: ReasoningParser | None) -> None:
|
||||
self._reasoning_parser = parser
|
||||
|
||||
@property
|
||||
def tool_parser(self) -> ToolParser | None:
|
||||
"""The underlying tool parser, if any."""
|
||||
return self._tool_parser
|
||||
|
||||
@tool_parser.setter
|
||||
def tool_parser(self, parser: ToolParser | None) -> None:
|
||||
self._tool_parser = parser
|
||||
|
||||
# ========== Reasoning Parser Methods ==========
|
||||
|
||||
@abstractmethod
|
||||
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||
"""
|
||||
Check if the reasoning content ends in the input_ids.
|
||||
|
||||
Used by structured engines like `xgrammar` to check if the
|
||||
reasoning content ends in the model output.
|
||||
|
||||
Args:
|
||||
input_ids: The token IDs of the model output.
|
||||
|
||||
Returns:
|
||||
True if the reasoning content ends in the input_ids.
|
||||
"""
|
||||
|
||||
def is_reasoning_end_streaming(
|
||||
self, input_ids: list[int], delta_ids: list[int]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the reasoning content ends during a decode step.
|
||||
|
||||
Args:
|
||||
input_ids: The entire model output token IDs.
|
||||
delta_ids: The last few computed tokens at the current decode step.
|
||||
|
||||
Returns:
|
||||
True if the reasoning content ends in the delta_ids.
|
||||
"""
|
||||
return self.is_reasoning_end(input_ids)
|
||||
|
||||
@abstractmethod
|
||||
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
|
||||
"""
|
||||
Extract content token IDs from the input_ids.
|
||||
|
||||
This extracts the non-reasoning content (e.g., everything after
|
||||
the </think> tag).
|
||||
|
||||
Args:
|
||||
input_ids: The token IDs of the model output.
|
||||
|
||||
Returns:
|
||||
The extracted content token IDs.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def extract_reasoning(
|
||||
self,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | ResponsesRequest,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Extract reasoning content from a complete model-generated string.
|
||||
|
||||
Used for non-streaming responses where we have the entire model
|
||||
response available before sending to the client.
|
||||
|
||||
Args:
|
||||
model_output: The complete model-generated string.
|
||||
request: The request object used to generate the output.
|
||||
|
||||
Returns:
|
||||
A tuple of (reasoning_content, response_content).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def extract_reasoning_streaming(
|
||||
self,
|
||||
previous_text: str,
|
||||
current_text: str,
|
||||
delta_text: str,
|
||||
previous_token_ids: Sequence[int],
|
||||
current_token_ids: Sequence[int],
|
||||
delta_token_ids: Sequence[int],
|
||||
) -> DeltaMessage | None:
|
||||
"""
|
||||
Extract reasoning content from a streaming delta message.
|
||||
|
||||
Args:
|
||||
previous_text: Text from all previous tokens.
|
||||
current_text: Text including the current delta.
|
||||
delta_text: The new text in this delta.
|
||||
previous_token_ids: Token IDs from previous generation.
|
||||
current_token_ids: All token IDs including current.
|
||||
delta_token_ids: The new token IDs in this delta.
|
||||
|
||||
Returns:
|
||||
A DeltaMessage with reasoning and/or content fields, or None.
|
||||
"""
|
||||
|
||||
# ========== Tool Parser Methods ==========
|
||||
|
||||
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
|
||||
"""
|
||||
Adjust the request parameters for tool calling.
|
||||
|
||||
Can be overridden by subclasses to modify request parameters
|
||||
(e.g., setting structured output schemas for tool calling).
|
||||
|
||||
Args:
|
||||
request: The original request.
|
||||
|
||||
Returns:
|
||||
The adjusted request.
|
||||
"""
|
||||
return request
|
||||
|
||||
@abstractmethod
|
||||
def extract_tool_calls(
|
||||
self,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest,
|
||||
) -> ExtractedToolCallInformation:
|
||||
"""
|
||||
Extract tool calls from a complete model-generated string.
|
||||
|
||||
Used for non-streaming responses.
|
||||
|
||||
Args:
|
||||
model_output: The complete model-generated string.
|
||||
request: The request object used to generate the output.
|
||||
|
||||
Returns:
|
||||
ExtractedToolCallInformation containing the tool calls.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def extract_tool_calls_streaming(
|
||||
self,
|
||||
previous_text: str,
|
||||
current_text: str,
|
||||
delta_text: str,
|
||||
previous_token_ids: Sequence[int],
|
||||
current_token_ids: Sequence[int],
|
||||
delta_token_ids: Sequence[int],
|
||||
request: ChatCompletionRequest,
|
||||
) -> DeltaMessage | None:
|
||||
"""
|
||||
Extract tool calls from a streaming delta message.
|
||||
|
||||
Args:
|
||||
previous_text: Text from all previous tokens.
|
||||
current_text: Text including the current delta.
|
||||
delta_text: The new text in this delta.
|
||||
previous_token_ids: Token IDs from previous generation.
|
||||
current_token_ids: All token IDs including current.
|
||||
delta_token_ids: The new token IDs in this delta.
|
||||
request: The request object.
|
||||
|
||||
Returns:
|
||||
A DeltaMessage with tool_calls field, or None.
|
||||
"""
|
||||
|
||||
|
||||
class DelegatingParser(Parser):
|
||||
"""
|
||||
A Parser implementation that delegates to separate ReasoningParser and
|
||||
ToolParser instances.
|
||||
|
||||
This is the recommended base class for creating model-specific parsers
|
||||
that combine existing reasoning and tool parser implementations.
|
||||
Subclasses should set `self._reasoning_parser` and `self._tool_parser`
|
||||
in their `__init__` method.
|
||||
|
||||
If either parser is None, the corresponding methods will return default
|
||||
values (no reasoning extraction, no tool calls).
|
||||
"""
|
||||
|
||||
def extract_reasoning(
|
||||
self,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest | ResponsesRequest,
|
||||
) -> tuple[str | None, str | None]:
|
||||
if self._reasoning_parser is None:
|
||||
return None, model_output
|
||||
return self._reasoning_parser.extract_reasoning(model_output, request)
|
||||
|
||||
def extract_reasoning_streaming(
|
||||
self,
|
||||
previous_text: str,
|
||||
current_text: str,
|
||||
delta_text: str,
|
||||
previous_token_ids: Sequence[int],
|
||||
current_token_ids: Sequence[int],
|
||||
delta_token_ids: Sequence[int],
|
||||
) -> DeltaMessage | None:
|
||||
if self._reasoning_parser is None:
|
||||
return DeltaMessage(content=delta_text)
|
||||
return self._reasoning_parser.extract_reasoning_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
delta_token_ids,
|
||||
)
|
||||
|
||||
def extract_tool_calls(
|
||||
self,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest,
|
||||
) -> ExtractedToolCallInformation:
|
||||
if self._tool_parser is None:
|
||||
return ExtractedToolCallInformation(
|
||||
tools_called=False, tool_calls=[], content=model_output
|
||||
)
|
||||
return self._tool_parser.extract_tool_calls(model_output, request)
|
||||
|
||||
def extract_tool_calls_streaming(
|
||||
self,
|
||||
previous_text: str,
|
||||
current_text: str,
|
||||
delta_text: str,
|
||||
previous_token_ids: Sequence[int],
|
||||
current_token_ids: Sequence[int],
|
||||
delta_token_ids: Sequence[int],
|
||||
request: ChatCompletionRequest,
|
||||
) -> DeltaMessage | None:
|
||||
if self._tool_parser is None:
|
||||
return None
|
||||
return self._tool_parser.extract_tool_calls_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
delta_token_ids,
|
||||
request,
|
||||
)
|
||||
|
||||
|
||||
class _WrappedParser(DelegatingParser):
|
||||
"""
|
||||
A DelegatingParser subclass that instantiates parsers from class attributes.
|
||||
|
||||
This class is used to dynamically create a parser that wraps individual
|
||||
ReasoningParser and ToolParser classes. The class attributes
|
||||
`reasoning_parser_cls` and `tool_parser_cls` should be set before
|
||||
instantiation.
|
||||
|
||||
Usage:
|
||||
_WrappedParser.reasoning_parser_cls = MyReasoningParser
|
||||
_WrappedParser.tool_parser_cls = MyToolParser
|
||||
parser = _WrappedParser(tokenizer)
|
||||
"""
|
||||
|
||||
reasoning_parser_cls: type[ReasoningParser] | None = None
|
||||
tool_parser_cls: type[ToolParser] | None = None
|
||||
|
||||
def __init__(self, tokenizer: TokenizerLike):
|
||||
super().__init__(tokenizer)
|
||||
# Instantiate the underlying parsers from class attributes
|
||||
if self.__class__.reasoning_parser_cls is not None:
|
||||
self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
|
||||
if self.__class__.tool_parser_cls is not None:
|
||||
self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
|
||||
52
vllm/parser/minimax_m2_parser.py
Normal file
52
vllm/parser/minimax_m2_parser.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""
|
||||
MiniMax M2 Parser - A unified parser for MiniMax M2 models.
|
||||
|
||||
This parser combines the existing MiniMaxM2ReasoningParser and
|
||||
MinimaxM2ToolParser into a single unified interface by delegating
|
||||
to those implementations.
|
||||
"""
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.parser.abstract_parser import DelegatingParser
|
||||
from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class MiniMaxM2Parser(DelegatingParser):
|
||||
"""
|
||||
Unified parser for MiniMax M2 models that handles both reasoning
|
||||
extraction and tool call parsing.
|
||||
|
||||
This parser delegates to the existing implementations:
|
||||
- MiniMaxM2ReasoningParser for reasoning extraction
|
||||
- MinimaxM2ToolParser for tool call parsing
|
||||
|
||||
MiniMax M2 models have two special behaviors:
|
||||
1. Reasoning: They don't generate <think> start token, only </think> end
|
||||
token. All content before </think> is reasoning, content after is the
|
||||
actual response.
|
||||
2. Tool Calls: They use <minimax:tool_call>...</minimax:tool_call> tags
|
||||
with <invoke name="...">...</invoke> and <parameter name="...">...</parameter>
|
||||
syntax.
|
||||
"""
|
||||
|
||||
# Class-level parser classes for compatibility
|
||||
reasoning_parser_cls = MiniMaxM2ReasoningParser
|
||||
tool_parser_cls = MinimaxM2ToolParser
|
||||
|
||||
def __init__(self, tokenizer: TokenizerLike):
|
||||
super().__init__(tokenizer)
|
||||
|
||||
# Initialize the underlying parsers
|
||||
self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer)
|
||||
self._tool_parser = MinimaxM2ToolParser(tokenizer)
|
||||
|
||||
logger.debug(
|
||||
"vLLM Successfully initialized parser %s!", self.__class__.__name__
|
||||
)
|
||||
308
vllm/parser/parser_manager.py
Normal file
308
vllm/parser/parser_manager.py
Normal file
@@ -0,0 +1,308 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils.collection_utils import is_list_of
|
||||
from vllm.utils.import_utils import import_from_path
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.parser.abstract_parser import Parser
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.tool_parsers import ToolParser
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class ParserManager:
|
||||
"""
|
||||
Central registry for Parser implementations.
|
||||
|
||||
Supports two registration modes:
|
||||
- Eager registration via `register_module`
|
||||
- Lazy registration via `register_lazy_module`
|
||||
"""
|
||||
|
||||
parsers: dict[str, type[Parser]] = {}
|
||||
lazy_parsers: dict[str, tuple[str, str]] = {} # name -> (module_path, class_name)
|
||||
|
||||
@classmethod
|
||||
def get_parser_internal(cls, name: str) -> type[Parser]:
|
||||
"""
|
||||
Retrieve a registered or lazily registered Parser class.
|
||||
|
||||
Args:
|
||||
name: The registered name of the parser.
|
||||
|
||||
Returns:
|
||||
The Parser class.
|
||||
|
||||
Raises:
|
||||
KeyError: If no parser is found under the given name.
|
||||
"""
|
||||
if name in cls.parsers:
|
||||
return cls.parsers[name]
|
||||
|
||||
if name in cls.lazy_parsers:
|
||||
return cls._load_lazy_parser(name)
|
||||
|
||||
registered = ", ".join(cls.list_registered())
|
||||
raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")
|
||||
|
||||
@classmethod
|
||||
def _load_lazy_parser(cls, name: str) -> type[Parser]:
|
||||
"""Import and register a lazily loaded parser."""
|
||||
from vllm.parser.abstract_parser import Parser
|
||||
|
||||
module_path, class_name = cls.lazy_parsers[name]
|
||||
try:
|
||||
mod = importlib.import_module(module_path)
|
||||
parser_cls = getattr(mod, class_name)
|
||||
if not issubclass(parser_cls, Parser):
|
||||
raise TypeError(
|
||||
f"{class_name} in {module_path} is not a Parser subclass."
|
||||
)
|
||||
cls.parsers[name] = parser_cls # cache
|
||||
return parser_cls
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Failed to import lazy parser '%s' from %s: %s",
|
||||
name,
|
||||
module_path,
|
||||
e,
|
||||
)
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def _register_module(
|
||||
cls,
|
||||
module: type[Parser],
|
||||
module_name: str | list[str] | None = None,
|
||||
force: bool = True,
|
||||
) -> None:
|
||||
"""Register a Parser class immediately."""
|
||||
from vllm.parser.abstract_parser import Parser
|
||||
|
||||
if not issubclass(module, Parser):
|
||||
raise TypeError(
|
||||
f"module must be subclass of Parser, but got {type(module)}"
|
||||
)
|
||||
|
||||
if module_name is None:
|
||||
module_names = [module.__name__]
|
||||
elif isinstance(module_name, str):
|
||||
module_names = [module_name]
|
||||
elif is_list_of(module_name, str):
|
||||
module_names = module_name
|
||||
else:
|
||||
raise TypeError("module_name must be str, list[str], or None.")
|
||||
|
||||
for name in module_names:
|
||||
if not force and name in cls.parsers:
|
||||
existed = cls.parsers[name]
|
||||
raise KeyError(f"{name} is already registered at {existed.__module__}")
|
||||
cls.parsers[name] = module
|
||||
|
||||
@classmethod
|
||||
def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
|
||||
"""
|
||||
Register a lazy module mapping for delayed import.
|
||||
|
||||
Example:
|
||||
ParserManager.register_lazy_module(
|
||||
name="minimax_m2",
|
||||
module_path="vllm.parser.minimax_m2_parser",
|
||||
class_name="MiniMaxM2Parser",
|
||||
)
|
||||
"""
|
||||
cls.lazy_parsers[name] = (module_path, class_name)
|
||||
|
||||
@classmethod
|
||||
def register_module(
|
||||
cls,
|
||||
name: str | list[str] | None = None,
|
||||
force: bool = True,
|
||||
module: type[Parser] | None = None,
|
||||
) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
|
||||
"""
|
||||
Register a Parser class.
|
||||
|
||||
Can be used as a decorator or called directly.
|
||||
|
||||
Usage:
|
||||
@ParserManager.register_module("my_parser")
|
||||
class MyParser(Parser):
|
||||
...
|
||||
|
||||
Or:
|
||||
ParserManager.register_module(module=MyParser)
|
||||
"""
|
||||
if not isinstance(force, bool):
|
||||
raise TypeError(f"force must be a boolean, but got {type(force)}")
|
||||
|
||||
# Immediate registration
|
||||
if module is not None:
|
||||
cls._register_module(module=module, module_name=name, force=force)
|
||||
return module
|
||||
|
||||
# Decorator usage
|
||||
def _decorator(obj: type[Parser]) -> type[Parser]:
|
||||
module_path = obj.__module__
|
||||
class_name = obj.__name__
|
||||
|
||||
if isinstance(name, str):
|
||||
names = [name]
|
||||
elif is_list_of(name, str):
|
||||
names = name
|
||||
else:
|
||||
names = [class_name]
|
||||
|
||||
for n in names:
|
||||
cls.lazy_parsers[n] = (module_path, class_name)
|
||||
|
||||
return obj
|
||||
|
||||
return _decorator
|
||||
|
||||
@classmethod
|
||||
def list_registered(cls) -> list[str]:
|
||||
"""Return names of all registered parsers."""
|
||||
return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))
|
||||
|
||||
@classmethod
|
||||
def import_parser(cls, plugin_path: str) -> None:
|
||||
"""Import a user-defined parser from an arbitrary path."""
|
||||
module_name = os.path.splitext(os.path.basename(plugin_path))[0]
|
||||
try:
|
||||
import_from_path(module_name, plugin_path)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to load module '%s' from %s.", module_name, plugin_path
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_tool_parser(
|
||||
cls,
|
||||
tool_parser_name: str | None = None,
|
||||
enable_auto_tools: bool = False,
|
||||
model_name: str | None = None,
|
||||
) -> type[ToolParser] | None:
|
||||
"""Get the tool parser based on the name."""
|
||||
from vllm.tool_parsers import ToolParserManager
|
||||
|
||||
parser: type[ToolParser] | None = None
|
||||
if not enable_auto_tools or tool_parser_name is None:
|
||||
return parser
|
||||
logger.info('"auto" tool choice has been enabled.')
|
||||
|
||||
try:
|
||||
if (
|
||||
tool_parser_name == "pythonic"
|
||||
and model_name
|
||||
and model_name.startswith("meta-llama/Llama-3.2")
|
||||
):
|
||||
logger.warning(
|
||||
"Llama3.2 models may struggle to emit valid pythonic tool calls"
|
||||
)
|
||||
parser = ToolParserManager.get_tool_parser(tool_parser_name)
|
||||
except Exception as e:
|
||||
raise TypeError(
|
||||
"Error: --enable-auto-tool-choice requires "
|
||||
f"tool_parser:'{tool_parser_name}' which has not "
|
||||
"been registered"
|
||||
) from e
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
def get_reasoning_parser(
|
||||
cls,
|
||||
reasoning_parser_name: str | None,
|
||||
) -> type[ReasoningParser] | None:
|
||||
"""Get the reasoning parser based on the name."""
|
||||
from vllm.reasoning import ReasoningParserManager
|
||||
|
||||
parser: type[ReasoningParser] | None = None
|
||||
if not reasoning_parser_name:
|
||||
return None
|
||||
try:
|
||||
parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
|
||||
assert parser is not None
|
||||
except Exception as e:
|
||||
raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
def get_parser(
|
||||
cls,
|
||||
tool_parser_name: str | None = None,
|
||||
reasoning_parser_name: str | None = None,
|
||||
enable_auto_tools: bool = False,
|
||||
model_name: str | None = None,
|
||||
) -> type[Parser] | None:
|
||||
"""
|
||||
Get a unified Parser that handles both reasoning and tool parsing.
|
||||
|
||||
This method checks if a unified Parser exists that can handle both
|
||||
reasoning extraction and tool call parsing. If no unified parser
|
||||
exists, it creates a DelegatingParser that wraps the individual
|
||||
reasoning and tool parsers.
|
||||
|
||||
Args:
|
||||
tool_parser_name: The name of the tool parser.
|
||||
reasoning_parser_name: The name of the reasoning parser.
|
||||
enable_auto_tools: Whether auto tool choice is enabled.
|
||||
model_name: The model name for parser-specific warnings.
|
||||
|
||||
Returns:
|
||||
A Parser class, or None if neither parser is specified.
|
||||
"""
|
||||
from vllm.parser.abstract_parser import _WrappedParser
|
||||
|
||||
if not tool_parser_name and not reasoning_parser_name:
|
||||
return None
|
||||
|
||||
# Strategy 1: If both names match, check for a unified parser with that name
|
||||
if tool_parser_name and tool_parser_name == reasoning_parser_name:
|
||||
try:
|
||||
parser = cls.get_parser_internal(tool_parser_name)
|
||||
logger.info(
|
||||
"Using unified parser '%s' for both reasoning and tool parsing.",
|
||||
tool_parser_name,
|
||||
)
|
||||
return parser
|
||||
except KeyError:
|
||||
pass # No unified parser with this name
|
||||
|
||||
# Strategy 2: Check for parser with either name
|
||||
for name in [tool_parser_name, reasoning_parser_name]:
|
||||
if name:
|
||||
try:
|
||||
parser = cls.get_parser_internal(name)
|
||||
logger.info(
|
||||
"Using unified parser '%s' for reasoning and tool parsing.",
|
||||
name,
|
||||
)
|
||||
return parser
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Strategy 3: Create a DelegatingParser with the individual parser classes
|
||||
reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
|
||||
tool_parser_cls = cls.get_tool_parser(
|
||||
tool_parser_name, enable_auto_tools, model_name
|
||||
)
|
||||
|
||||
if reasoning_parser_cls is None and tool_parser_cls is None:
|
||||
return None
|
||||
|
||||
# Set the class-level attributes on the imported _WrappedParser
|
||||
_WrappedParser.reasoning_parser_cls = reasoning_parser_cls
|
||||
_WrappedParser.tool_parser_cls = tool_parser_cls
|
||||
|
||||
return _WrappedParser
|
||||
Reference in New Issue
Block a user