From c6bb5b56030df628697bcccad551276f81d2019e Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Mon, 12 Jan 2026 18:33:14 -0800 Subject: [PATCH] [BugFix] Fix engine crash caused by chat tools + response_format (#32127) Signed-off-by: Nick Hill --- tests/tool_use/test_chat_completions.py | 42 +++++++++++++++++++++++ vllm/tool_parsers/abstract_tool_parser.py | 1 + vllm/v1/engine/input_processor.py | 4 +++ 3 files changed, 47 insertions(+) diff --git a/tests/tool_use/test_chat_completions.py b/tests/tool_use/test_chat_completions.py index 425d38799..07b7933f6 100644 --- a/tests/tool_use/test_chat_completions.py +++ b/tests/tool_use/test_chat_completions.py @@ -151,3 +151,45 @@ async def test_chat_completion_with_tools( assert chunk.choices[0].finish_reason != "tool_calls" assert len(chunks) assert "".join(chunks) == output_text + + +# Regression test for https://github.com/vllm-project/vllm/issues/32006 +# Engine crash when combining response_format: json_object with +# tool_choice: required +@pytest.mark.asyncio +@pytest.mark.timeout(120) +async def test_response_format_with_tool_choice_required( + client: openai.AsyncOpenAI, server_config: ServerConfig +): + """ + Test that combining response_format: json_object with tool_choice: required + doesn't crash the engine. + + Before the fix, this would cause a validation error: + "You can only use one kind of structured outputs constraint but multiple + are specified" because both json_object and json (from tool schema) would + be set in StructuredOutputsParams. + """ + models = await client.models.list() + model_name: str = models.data[0].id + + # This combination previously crashed the engine + chat_completion = await client.chat.completions.create( + messages=ensure_system_prompt( + [{"role": "user", "content": "What is the weather in Dallas, Texas?"}], + server_config, + ), + temperature=0, + max_completion_tokens=150, + model=model_name, + tools=[WEATHER_TOOL], + tool_choice="required", + response_format={"type": "json_object"}, + ) + + # The fix clears response_format when tool_choice forces tool calling, + # so the request should complete successfully with tool calls + choice = chat_completion.choices[0] + assert choice.finish_reason == "tool_calls" + assert choice.message.tool_calls is not None + assert len(choice.message.tool_calls) > 0 diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index e2ccb1dad..b7cac3454 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -67,6 +67,7 @@ class ToolParser: # tool_choice: "Forced Function" or "required" will override # structured output json settings to make tool calling work correctly request.structured_outputs.json = json_schema_from_tool + request.response_format = None if isinstance(request, ResponsesRequest): request.text = ResponseTextConfig() request.text.format = ResponseFormatTextJSONSchemaConfig( diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py index 573047e20..f9a77f581 100644 --- a/vllm/v1/engine/input_processor.py +++ b/vllm/v1/engine/input_processor.py @@ -370,6 +370,10 @@ class InputProcessor: # Remember that this backend was set automatically params.structured_outputs._backend_was_auto = True + # Run post-init validation. This is also important to ensure subsequent + # roundtrip serialization/deserialization won't fail. + params.structured_outputs.__post_init__() + def _maybe_build_mm_uuids( self, request_id: str,