[BugFix] Fix engine crash caused by chat tools + response_format (#32127)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
2026-01-12 18:33:14 -08:00
parent 9273a427b5
commit c6bb5b5603
3 changed files with 47 additions and 0 deletions
--- a/tests/tool_use/test_chat_completions.py
+++ b/tests/tool_use/test_chat_completions.py
@@ -151,3 +151,45 @@ async def test_chat_completion_with_tools(
    assert chunk.choices[0].finish_reason != "tool_calls"
    assert len(chunks)
    assert "".join(chunks) == output_text
 # Regression test for https://github.com/vllm-project/vllm/issues/32006
 # Engine crash when combining response_format: json_object with
 # tool_choice: required
@pytest.mark.asyncio
@pytest.mark.timeout(120)
 async def test_response_format_with_tool_choice_required(
    client: openai.AsyncOpenAI, server_config: ServerConfig
 ):
    """
    Test that combining response_format: json_object with tool_choice: required
    doesn't crash the engine.
    Before the fix, this would cause a validation error:
    "You can only use one kind of structured outputs constraint but multiple
    are specified" because both json_object and json (from tool schema) would
    be set in StructuredOutputsParams.
    """
    models = await client.models.list()
    model_name: str = models.data[0].id
    # This combination previously crashed the engine
    chat_completion = await client.chat.completions.create(
        messages=ensure_system_prompt(
            [{"role": "user", "content": "What is the weather in Dallas, Texas?"}],
            server_config,
        ),
        temperature=0,
        max_completion_tokens=150,
        model=model_name,
        tools=[WEATHER_TOOL],
        tool_choice="required",
        response_format={"type": "json_object"},
    )
    # The fix clears response_format when tool_choice forces tool calling,
    # so the request should complete successfully with tool calls
    choice = chat_completion.choices[0]
    assert choice.finish_reason == "tool_calls"
    assert choice.message.tool_calls is not None
    assert len(choice.message.tool_calls) > 0
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -67,6 +67,7 @@ class ToolParser:
                # tool_choice: "Forced Function" or "required" will override
                # structured output json settings to make tool calling work correctly
                request.structured_outputs.json = json_schema_from_tool
                request.response_format = None
            if isinstance(request, ResponsesRequest):
                request.text = ResponseTextConfig()
                request.text.format = ResponseFormatTextJSONSchemaConfig(
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -370,6 +370,10 @@ class InputProcessor:
            # Remember that this backend was set automatically
            params.structured_outputs._backend_was_auto = True
        # Run post-init validation. This is also important to ensure subsequent
        # roundtrip serialization/deserialization won't fail.
        params.structured_outputs.__post_init__()
    def _maybe_build_mm_uuids(
        self,
        request_id: str,