[BugFix] Fix engine crash caused by chat tools + response_format (#32127)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -151,3 +151,45 @@ async def test_chat_completion_with_tools(
|
|||||||
assert chunk.choices[0].finish_reason != "tool_calls"
|
assert chunk.choices[0].finish_reason != "tool_calls"
|
||||||
assert len(chunks)
|
assert len(chunks)
|
||||||
assert "".join(chunks) == output_text
|
assert "".join(chunks) == output_text
|
||||||
|
|
||||||
|
|
||||||
|
# Regression test for https://github.com/vllm-project/vllm/issues/32006
|
||||||
|
# Engine crash when combining response_format: json_object with
|
||||||
|
# tool_choice: required
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.timeout(120)
|
||||||
|
async def test_response_format_with_tool_choice_required(
|
||||||
|
client: openai.AsyncOpenAI, server_config: ServerConfig
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Test that combining response_format: json_object with tool_choice: required
|
||||||
|
doesn't crash the engine.
|
||||||
|
|
||||||
|
Before the fix, this would cause a validation error:
|
||||||
|
"You can only use one kind of structured outputs constraint but multiple
|
||||||
|
are specified" because both json_object and json (from tool schema) would
|
||||||
|
be set in StructuredOutputsParams.
|
||||||
|
"""
|
||||||
|
models = await client.models.list()
|
||||||
|
model_name: str = models.data[0].id
|
||||||
|
|
||||||
|
# This combination previously crashed the engine
|
||||||
|
chat_completion = await client.chat.completions.create(
|
||||||
|
messages=ensure_system_prompt(
|
||||||
|
[{"role": "user", "content": "What is the weather in Dallas, Texas?"}],
|
||||||
|
server_config,
|
||||||
|
),
|
||||||
|
temperature=0,
|
||||||
|
max_completion_tokens=150,
|
||||||
|
model=model_name,
|
||||||
|
tools=[WEATHER_TOOL],
|
||||||
|
tool_choice="required",
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# The fix clears response_format when tool_choice forces tool calling,
|
||||||
|
# so the request should complete successfully with tool calls
|
||||||
|
choice = chat_completion.choices[0]
|
||||||
|
assert choice.finish_reason == "tool_calls"
|
||||||
|
assert choice.message.tool_calls is not None
|
||||||
|
assert len(choice.message.tool_calls) > 0
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ class ToolParser:
|
|||||||
# tool_choice: "Forced Function" or "required" will override
|
# tool_choice: "Forced Function" or "required" will override
|
||||||
# structured output json settings to make tool calling work correctly
|
# structured output json settings to make tool calling work correctly
|
||||||
request.structured_outputs.json = json_schema_from_tool
|
request.structured_outputs.json = json_schema_from_tool
|
||||||
|
request.response_format = None
|
||||||
if isinstance(request, ResponsesRequest):
|
if isinstance(request, ResponsesRequest):
|
||||||
request.text = ResponseTextConfig()
|
request.text = ResponseTextConfig()
|
||||||
request.text.format = ResponseFormatTextJSONSchemaConfig(
|
request.text.format = ResponseFormatTextJSONSchemaConfig(
|
||||||
|
|||||||
@@ -370,6 +370,10 @@ class InputProcessor:
|
|||||||
# Remember that this backend was set automatically
|
# Remember that this backend was set automatically
|
||||||
params.structured_outputs._backend_was_auto = True
|
params.structured_outputs._backend_was_auto = True
|
||||||
|
|
||||||
|
# Run post-init validation. This is also important to ensure subsequent
|
||||||
|
# roundtrip serialization/deserialization won't fail.
|
||||||
|
params.structured_outputs.__post_init__()
|
||||||
|
|
||||||
def _maybe_build_mm_uuids(
|
def _maybe_build_mm_uuids(
|
||||||
self,
|
self,
|
||||||
request_id: str,
|
request_id: str,
|
||||||
|
|||||||
Reference in New Issue
Block a user