[bugfix] Fix online serving crash when text type response_format is received (#26822)

Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
Signed-off-by: j0shuajun <59368606+j0shuajun@users.noreply.github.com>
Co-authored-by: j0shuajun <59368606+j0shuajun@users.noreply.github.com>
This commit is contained in:
cjackal
2026-01-16 13:23:54 +09:00
committed by GitHub
parent 5de6dd0662
commit 35bf5d08e8
6 changed files with 68 additions and 26 deletions

View File

@@ -671,6 +671,25 @@ async def test_response_format_json_schema(client: openai.AsyncOpenAI):
assert loaded == {"result": 2}, loaded
@pytest.mark.asyncio
async def test_response_format_text(client: openai.AsyncOpenAI):
for _ in range(2):
resp = await client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "user",
"content": "what is 1+1?",
}
],
max_completion_tokens=10,
response_format={"type": "text"},
)
content = resp.choices[0].message.content
assert content is not None
@pytest.mark.asyncio
async def test_extra_fields_allowed(client: openai.AsyncOpenAI):
resp = await client.chat.completions.create(

View File

@@ -5,6 +5,7 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, ClassVar, Literal
import torch
@@ -417,18 +418,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
response_format = self.response_format
if response_format is not None:
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if self.structured_outputs is None:
self.structured_outputs = StructuredOutputsParams()
structured_outputs_kwargs = dict[str, Any]()
# Set structured output params for response format
if response_format.type == "json_object":
self.structured_outputs.json_object = True
structured_outputs_kwargs["json_object"] = True
elif response_format.type == "json_schema":
json_schema = response_format.json_schema
assert json_schema is not None
self.structured_outputs.json = json_schema.json_schema
structured_outputs_kwargs["json"] = json_schema.json_schema
elif response_format.type == "structural_tag":
structural_tag = response_format
assert structural_tag is not None and isinstance(
@@ -439,7 +437,16 @@ class ChatCompletionRequest(OpenAIBaseModel):
),
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj)
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if len(structured_outputs_kwargs) > 0:
self.structured_outputs = (
StructuredOutputsParams(**structured_outputs_kwargs)
if self.structured_outputs is None
else replace(self.structured_outputs, **structured_outputs_kwargs)
)
extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
if self.kv_transfer_params:

View File

@@ -5,6 +5,7 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, Literal
import torch
@@ -247,18 +248,15 @@ class CompletionRequest(OpenAIBaseModel):
response_format = self.response_format
if response_format is not None:
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if self.structured_outputs is None:
self.structured_outputs = StructuredOutputsParams()
structured_outputs_kwargs = dict[str, Any]()
# Set structured output params for response format
if response_format.type == "json_object":
self.structured_outputs.json_object = True
structured_outputs_kwargs["json_object"] = True
elif response_format.type == "json_schema":
json_schema = response_format.json_schema
assert json_schema is not None
self.structured_outputs.json = json_schema.json_schema
structured_outputs_kwargs["json"] = json_schema.json_schema
elif response_format.type == "structural_tag":
structural_tag = response_format
assert structural_tag is not None and isinstance(
@@ -269,7 +267,16 @@ class CompletionRequest(OpenAIBaseModel):
),
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj)
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if len(structured_outputs_kwargs) > 0:
self.structured_outputs = (
StructuredOutputsParams(**structured_outputs_kwargs)
if self.structured_outputs is None
else replace(self.structured_outputs, **structured_outputs_kwargs)
)
extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
if self.kv_transfer_params:

View File

@@ -9,7 +9,7 @@ from collections import deque
from collections.abc import AsyncGenerator, AsyncIterator, Callable, Sequence
from contextlib import AsyncExitStack
from copy import copy
from dataclasses import dataclass
from dataclasses import dataclass, replace
from http import HTTPStatus
from typing import Final
@@ -467,15 +467,18 @@ class OpenAIServingResponses(OpenAIServing):
if self.reasoning_parser is not None:
reasoning_parser = self.reasoning_parser(tokenizer)
if sampling_params.structured_outputs is None:
sampling_params.structured_outputs = StructuredOutputsParams()
struct_out = sampling_params.structured_outputs
if struct_out.all_non_structural_tag_constraints_none():
sampling_params.structured_outputs.structural_tag = (
reasoning_parser.prepare_structured_tag(
sampling_params.structured_outputs.structural_tag,
self.tool_server,
)
if (
isinstance(
struct_out := sampling_params.structured_outputs,
StructuredOutputsParams,
)
and struct_out.all_non_structural_tag_constraints_none()
):
sampling_params.structured_outputs = replace(
struct_out,
structural_tag=reasoning_parser.prepare_structured_tag(
struct_out.structural_tag, self.tool_server
),
)
generator = self._generate_with_builtin_tools(
request_id=request.request_id,

View File

@@ -67,6 +67,11 @@ class StructuredOutputsParams:
"You can only use one kind of structured outputs constraint "
f"but multiple are specified: {self.__dict__}"
)
if count < 1:
raise ValueError(
"You must use one kind of structured outputs constraint "
f"but none are specified: {self.__dict__}"
)
def all_constraints_none(self) -> bool:
"""

View File

@@ -65,10 +65,11 @@ class ToolParser:
# Set structured output params for tool calling
if json_schema_from_tool is not None:
if isinstance(request, ChatCompletionRequest):
request.structured_outputs = StructuredOutputsParams()
# tool_choice: "Forced Function" or "required" will override
# structured output json settings to make tool calling work correctly
request.structured_outputs.json = json_schema_from_tool
request.structured_outputs = StructuredOutputsParams(
json=json_schema_from_tool
)
request.response_format = None
if isinstance(request, ResponsesRequest):
request.text = ResponseTextConfig()