[responsesAPI] prioritize content over summary in reasoning item input (#36516)

Signed-off-by: Andrew Xia <axia@meta.com>
Signed-off-by: Andrew Xia <mitandrewxia@gmail.com>
Signed-off-by: Andrew Xia <axia@fb.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
Andrew Xia
2026-03-13 18:20:30 -07:00
committed by GitHub
parent b41aa264f9
commit f680dc1b39
2 changed files with 192 additions and 3 deletions

View File

@@ -1,6 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import patch
import pytest
from openai.types.chat import ChatCompletionMessageParam
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
@@ -166,6 +168,184 @@ class TestResponsesUtils:
assert formatted_item["content"] == "dongyi"
class TestReasoningItemContentPriority:
"""Tests that content is prioritized over summary for reasoning items."""
def test_content_preferred_over_summary(self):
"""When both content and summary are present, content should win."""
item = ResponseReasoningItem(
id="reasoning_1",
summary=[
Summary(
text="This is a summary",
type="summary_text",
)
],
type="reasoning",
content=[
Content(
text="This is the actual content",
type="reasoning_text",
)
],
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == "This is the actual content"
def test_content_only(self):
"""When only content is present (no summary), content is used."""
item = ResponseReasoningItem(
id="reasoning_2",
summary=[],
type="reasoning",
content=[
Content(
text="Content without summary",
type="reasoning_text",
)
],
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == "Content without summary"
@patch("vllm.entrypoints.openai.responses.utils.logger")
def test_summary_fallback_when_no_content(self, mock_logger):
"""When content is absent, summary is used as fallback with warning."""
item = ResponseReasoningItem(
id="reasoning_3",
summary=[
Summary(
text="Fallback summary text",
type="summary_text",
)
],
type="reasoning",
content=None,
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == "Fallback summary text"
mock_logger.warning.assert_called_once()
assert (
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
)
@patch("vllm.entrypoints.openai.responses.utils.logger")
def test_summary_fallback_when_content_empty(self, mock_logger):
"""When content is an empty list, summary is used as fallback."""
item = ResponseReasoningItem(
id="reasoning_4",
summary=[
Summary(
text="Summary when content empty",
type="summary_text",
)
],
type="reasoning",
content=[],
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == "Summary when content empty"
mock_logger.warning.assert_called_once()
assert (
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
)
def test_neither_content_nor_summary(self):
"""When neither content nor summary is present, reasoning is empty."""
item = ResponseReasoningItem(
id="reasoning_5",
summary=[],
type="reasoning",
content=None,
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == ""
def test_encrypted_content_raises(self):
"""Encrypted content should still raise ValueError."""
item = ResponseReasoningItem(
id="reasoning_6",
summary=[
Summary(
text="Some summary",
type="summary_text",
)
],
type="reasoning",
content=[
Content(
text="Some content",
type="reasoning_text",
)
],
encrypted_content="ENCRYPTED",
status=None,
)
with pytest.raises(ValueError):
_construct_single_message_from_response_item(item)
@patch("vllm.entrypoints.openai.responses.utils.logger")
def test_summary_with_multiple_entries_uses_first(self, mock_logger):
"""When multiple summary entries exist, the first one is used."""
item = ResponseReasoningItem(
id="reasoning_7",
summary=[
Summary(
text="First summary",
type="summary_text",
),
Summary(
text="Second summary",
type="summary_text",
),
],
type="reasoning",
content=None,
encrypted_content=None,
status=None,
)
formatted = _construct_single_message_from_response_item(item)
assert formatted["reasoning"] == "First summary"
mock_logger.warning.assert_called_once()
assert (
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
)
@patch("vllm.entrypoints.openai.responses.utils.logger")
def test_no_warning_when_content_used(self, mock_logger):
"""No warning should be emitted when content is available."""
item = ResponseReasoningItem(
id="reasoning_8",
summary=[
Summary(
text="Summary text",
type="summary_text",
)
],
type="reasoning",
content=[
Content(
text="Content text",
type="reasoning_text",
)
],
encrypted_content=None,
status=None,
)
_construct_single_message_from_response_item(item)
mock_logger.warning.assert_not_called()
class TestShouldContinueFinalMessage:
"""Tests for should_continue_final_message function.

View File

@@ -24,6 +24,9 @@ from vllm import envs
from vllm.entrypoints.constants import MCP_PREFIX
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionMessageParam
from vllm.entrypoints.openai.responses.protocol import ResponseInputOutputItem
from vllm.logger import init_logger
logger = init_logger(__name__)
def should_continue_final_message(
@@ -191,10 +194,16 @@ def _construct_single_message_from_response_item(
reasoning_content = ""
if item.encrypted_content:
raise ValueError("Encrypted content is not supported.")
if len(item.summary) == 1:
reasoning_content = item.summary[0].text
elif item.content and len(item.content) == 1:
elif item.content and len(item.content) >= 1:
reasoning_content = item.content[0].text
elif len(item.summary) >= 1:
reasoning_content = item.summary[0].text
logger.warning(
"Using summary text as reasoning content for item %s. "
"Please use content instead of summary for "
"reasoning items.",
item.id,
)
return {
"role": "assistant",
"reasoning": reasoning_content,