From 0e95916155a89195e97b8fae8d880c0aa0afc34e Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Tue, 17 Mar 2026 22:31:31 -0700 Subject: [PATCH] [responsesAPI] parser.extract_response_outputs can take in token IDs (#37130) Signed-off-by: Andrew Xia --- vllm/entrypoints/openai/responses/serving.py | 1 + vllm/parser/abstract_parser.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index dd42a6a56..b2428e97e 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -1012,6 +1012,7 @@ class OpenAIServingResponses(OpenAIServing): parser = self.parser(tokenizer) return parser.extract_response_outputs( model_output=final_output.text, + model_output_token_ids=final_output.token_ids, request=request, enable_auto_tools=self.enable_auto_tools, tool_call_id_type=self.tool_call_id_type, diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py index 0c1dda17b..ca8147ea1 100644 --- a/vllm/parser/abstract_parser.py +++ b/vllm/parser/abstract_parser.py @@ -155,7 +155,9 @@ class Parser: @abstractmethod def extract_response_outputs( self, + *, model_output: str, + model_output_token_ids: Sequence[int], request: ResponsesRequest, enable_auto_tools: bool = False, tool_call_id_type: str = "random", @@ -170,6 +172,7 @@ class Parser: Args: model_output: The complete model-generated string. + model_output_token_ids: The token IDs of the model output. request: The request object used to generate the output. enable_auto_tools: Whether to enable automatic tool call parsing. tool_call_id_type: Type of tool call ID generation ("random", etc). @@ -313,7 +316,9 @@ class DelegatingParser(Parser): def extract_response_outputs( self, + *, model_output: str, + model_output_token_ids: Sequence[int], request: ResponsesRequest, enable_auto_tools: bool = False, tool_call_id_type: str = "random",