[BugFix] Fix engine crash caused by chat tools + response_format (#32127)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-01-12 18:33:14 -08:00
committed by GitHub
parent 9273a427b5
commit c6bb5b5603
3 changed files with 47 additions and 0 deletions

View File

@@ -151,3 +151,45 @@ async def test_chat_completion_with_tools(
assert chunk.choices[0].finish_reason != "tool_calls" assert chunk.choices[0].finish_reason != "tool_calls"
assert len(chunks) assert len(chunks)
assert "".join(chunks) == output_text assert "".join(chunks) == output_text
# Regression test for https://github.com/vllm-project/vllm/issues/32006
# Engine crash when combining response_format: json_object with
# tool_choice: required
@pytest.mark.asyncio
@pytest.mark.timeout(120)
async def test_response_format_with_tool_choice_required(
client: openai.AsyncOpenAI, server_config: ServerConfig
):
"""
Test that combining response_format: json_object with tool_choice: required
doesn't crash the engine.
Before the fix, this would cause a validation error:
"You can only use one kind of structured outputs constraint but multiple
are specified" because both json_object and json (from tool schema) would
be set in StructuredOutputsParams.
"""
models = await client.models.list()
model_name: str = models.data[0].id
# This combination previously crashed the engine
chat_completion = await client.chat.completions.create(
messages=ensure_system_prompt(
[{"role": "user", "content": "What is the weather in Dallas, Texas?"}],
server_config,
),
temperature=0,
max_completion_tokens=150,
model=model_name,
tools=[WEATHER_TOOL],
tool_choice="required",
response_format={"type": "json_object"},
)
# The fix clears response_format when tool_choice forces tool calling,
# so the request should complete successfully with tool calls
choice = chat_completion.choices[0]
assert choice.finish_reason == "tool_calls"
assert choice.message.tool_calls is not None
assert len(choice.message.tool_calls) > 0

View File

@@ -67,6 +67,7 @@ class ToolParser:
# tool_choice: "Forced Function" or "required" will override # tool_choice: "Forced Function" or "required" will override
# structured output json settings to make tool calling work correctly # structured output json settings to make tool calling work correctly
request.structured_outputs.json = json_schema_from_tool request.structured_outputs.json = json_schema_from_tool
request.response_format = None
if isinstance(request, ResponsesRequest): if isinstance(request, ResponsesRequest):
request.text = ResponseTextConfig() request.text = ResponseTextConfig()
request.text.format = ResponseFormatTextJSONSchemaConfig( request.text.format = ResponseFormatTextJSONSchemaConfig(

View File

@@ -370,6 +370,10 @@ class InputProcessor:
# Remember that this backend was set automatically # Remember that this backend was set automatically
params.structured_outputs._backend_was_auto = True params.structured_outputs._backend_was_auto = True
# Run post-init validation. This is also important to ensure subsequent
# roundtrip serialization/deserialization won't fail.
params.structured_outputs.__post_init__()
def _maybe_build_mm_uuids( def _maybe_build_mm_uuids(
self, self,
request_id: str, request_id: str,