[responsesAPI] prioritize content over summary in reasoning item input (#36516)
Signed-off-by: Andrew Xia <axia@meta.com> Signed-off-by: Andrew Xia <mitandrewxia@gmail.com> Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
||||
@@ -166,6 +168,184 @@ class TestResponsesUtils:
|
||||
assert formatted_item["content"] == "dongyi"
|
||||
|
||||
|
||||
class TestReasoningItemContentPriority:
|
||||
"""Tests that content is prioritized over summary for reasoning items."""
|
||||
|
||||
def test_content_preferred_over_summary(self):
|
||||
"""When both content and summary are present, content should win."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_1",
|
||||
summary=[
|
||||
Summary(
|
||||
text="This is a summary",
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="This is the actual content",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == "This is the actual content"
|
||||
|
||||
def test_content_only(self):
|
||||
"""When only content is present (no summary), content is used."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_2",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Content without summary",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == "Content without summary"
|
||||
|
||||
@patch("vllm.entrypoints.openai.responses.utils.logger")
|
||||
def test_summary_fallback_when_no_content(self, mock_logger):
|
||||
"""When content is absent, summary is used as fallback with warning."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_3",
|
||||
summary=[
|
||||
Summary(
|
||||
text="Fallback summary text",
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == "Fallback summary text"
|
||||
mock_logger.warning.assert_called_once()
|
||||
assert (
|
||||
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
|
||||
)
|
||||
|
||||
@patch("vllm.entrypoints.openai.responses.utils.logger")
|
||||
def test_summary_fallback_when_content_empty(self, mock_logger):
|
||||
"""When content is an empty list, summary is used as fallback."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_4",
|
||||
summary=[
|
||||
Summary(
|
||||
text="Summary when content empty",
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=[],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == "Summary when content empty"
|
||||
mock_logger.warning.assert_called_once()
|
||||
assert (
|
||||
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
|
||||
)
|
||||
|
||||
def test_neither_content_nor_summary(self):
|
||||
"""When neither content nor summary is present, reasoning is empty."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_5",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == ""
|
||||
|
||||
def test_encrypted_content_raises(self):
|
||||
"""Encrypted content should still raise ValueError."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_6",
|
||||
summary=[
|
||||
Summary(
|
||||
text="Some summary",
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Some content",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content="ENCRYPTED",
|
||||
status=None,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
_construct_single_message_from_response_item(item)
|
||||
|
||||
@patch("vllm.entrypoints.openai.responses.utils.logger")
|
||||
def test_summary_with_multiple_entries_uses_first(self, mock_logger):
|
||||
"""When multiple summary entries exist, the first one is used."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_7",
|
||||
summary=[
|
||||
Summary(
|
||||
text="First summary",
|
||||
type="summary_text",
|
||||
),
|
||||
Summary(
|
||||
text="Second summary",
|
||||
type="summary_text",
|
||||
),
|
||||
],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted = _construct_single_message_from_response_item(item)
|
||||
assert formatted["reasoning"] == "First summary"
|
||||
mock_logger.warning.assert_called_once()
|
||||
assert (
|
||||
"summary text as reasoning content" in mock_logger.warning.call_args[0][0]
|
||||
)
|
||||
|
||||
@patch("vllm.entrypoints.openai.responses.utils.logger")
|
||||
def test_no_warning_when_content_used(self, mock_logger):
|
||||
"""No warning should be emitted when content is available."""
|
||||
item = ResponseReasoningItem(
|
||||
id="reasoning_8",
|
||||
summary=[
|
||||
Summary(
|
||||
text="Summary text",
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Content text",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
_construct_single_message_from_response_item(item)
|
||||
mock_logger.warning.assert_not_called()
|
||||
|
||||
|
||||
class TestShouldContinueFinalMessage:
|
||||
"""Tests for should_continue_final_message function.
|
||||
|
||||
|
||||
@@ -24,6 +24,9 @@ from vllm import envs
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionMessageParam
|
||||
from vllm.entrypoints.openai.responses.protocol import ResponseInputOutputItem
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def should_continue_final_message(
|
||||
@@ -191,10 +194,16 @@ def _construct_single_message_from_response_item(
|
||||
reasoning_content = ""
|
||||
if item.encrypted_content:
|
||||
raise ValueError("Encrypted content is not supported.")
|
||||
if len(item.summary) == 1:
|
||||
reasoning_content = item.summary[0].text
|
||||
elif item.content and len(item.content) == 1:
|
||||
elif item.content and len(item.content) >= 1:
|
||||
reasoning_content = item.content[0].text
|
||||
elif len(item.summary) >= 1:
|
||||
reasoning_content = item.summary[0].text
|
||||
logger.warning(
|
||||
"Using summary text as reasoning content for item %s. "
|
||||
"Please use content instead of summary for "
|
||||
"reasoning items.",
|
||||
item.id,
|
||||
)
|
||||
return {
|
||||
"role": "assistant",
|
||||
"reasoning": reasoning_content,
|
||||
|
||||
Reference in New Issue
Block a user