diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index dd42a6a56..b2428e97e 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -1012,6 +1012,7 @@ class OpenAIServingResponses(OpenAIServing): parser = self.parser(tokenizer) return parser.extract_response_outputs( model_output=final_output.text, + model_output_token_ids=final_output.token_ids, request=request, enable_auto_tools=self.enable_auto_tools, tool_call_id_type=self.tool_call_id_type, diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py index 0c1dda17b..ca8147ea1 100644 --- a/vllm/parser/abstract_parser.py +++ b/vllm/parser/abstract_parser.py @@ -155,7 +155,9 @@ class Parser: @abstractmethod def extract_response_outputs( self, + *, model_output: str, + model_output_token_ids: Sequence[int], request: ResponsesRequest, enable_auto_tools: bool = False, tool_call_id_type: str = "random", @@ -170,6 +172,7 @@ class Parser: Args: model_output: The complete model-generated string. + model_output_token_ids: The token IDs of the model output. request: The request object used to generate the output. enable_auto_tools: Whether to enable automatic tool call parsing. tool_call_id_type: Type of tool call ID generation ("random", etc). @@ -313,7 +316,9 @@ class DelegatingParser(Parser): def extract_response_outputs( self, + *, model_output: str, + model_output_token_ids: Sequence[int], request: ResponsesRequest, enable_auto_tools: bool = False, tool_call_id_type: str = "random",