[responseAPI] support partial message generation (#32100)

Signed-off-by: Andrew Xia <axia@fb.com>
Signed-off-by: Andrew Xia <mitandrewxia@gmail.com>
Signed-off-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
Co-authored-by: Andrew Xia <axia@fb.com>
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
Andrew Xia
2026-01-13 13:41:26 -05:00
committed by GitHub
parent 6beef12b9b
commit af54d2e2d0
3 changed files with 337 additions and 0 deletions

View File

@@ -21,6 +21,7 @@ from vllm.entrypoints.responses_utils import (
_maybe_combine_reasoning_and_tool_call,
construct_chat_messages_with_tool_call,
convert_tool_responses_to_completions_format,
should_continue_final_message,
)
@@ -165,6 +166,285 @@ class TestResponsesUtils:
assert formatted_item["content"] == "dongyi"
class TestShouldContinueFinalMessage:
"""Tests for should_continue_final_message function.
This function enables Anthropic-style partial message completion, where
users can provide an incomplete assistant message and have the model
continue from where it left off.
"""
def test_string_input_returns_false(self):
"""String input is always a user message, so should not continue."""
assert should_continue_final_message("Hello, world!") is False
def test_empty_list_returns_false(self):
"""Empty list should not continue."""
assert should_continue_final_message([]) is False
def test_completed_message_returns_false(self):
"""Completed message should not be continued."""
output_item = ResponseOutputMessage(
id="msg_123",
content=[
ResponseOutputText(
annotations=[],
text="The answer is 42.",
type="output_text",
logprobs=None,
)
],
role="assistant",
status="completed",
type="message",
)
assert should_continue_final_message([output_item]) is False
def test_in_progress_message_returns_true(self):
"""In-progress message should be continued.
This is the key use case for partial message completion.
Example: The user provides "The best answer is (" and wants
the model to continue from there.
"""
output_item = ResponseOutputMessage(
id="msg_123",
content=[
ResponseOutputText(
annotations=[],
text="The best answer is (",
type="output_text",
logprobs=None,
)
],
role="assistant",
status="in_progress",
type="message",
)
assert should_continue_final_message([output_item]) is True
def test_incomplete_message_returns_true(self):
"""Incomplete message should be continued."""
output_item = ResponseOutputMessage(
id="msg_123",
content=[
ResponseOutputText(
annotations=[],
text="The answer",
type="output_text",
logprobs=None,
)
],
role="assistant",
status="incomplete",
type="message",
)
assert should_continue_final_message([output_item]) is True
def test_in_progress_reasoning_returns_true(self):
"""In-progress reasoning should be continued."""
reasoning_item = ResponseReasoningItem(
id="reasoning_123",
summary=[],
type="reasoning",
content=[
Content(
text="Let me think about this...",
type="reasoning_text",
)
],
encrypted_content=None,
status="in_progress",
)
assert should_continue_final_message([reasoning_item]) is True
def test_incomplete_reasoning_returns_true(self):
"""Incomplete reasoning should be continued."""
reasoning_item = ResponseReasoningItem(
id="reasoning_123",
summary=[],
type="reasoning",
content=[
Content(
text="Let me think",
type="reasoning_text",
)
],
encrypted_content=None,
status="incomplete",
)
assert should_continue_final_message([reasoning_item]) is True
reasoning_item = {
"id": "reasoning_123",
"summary": [],
"type": "reasoning",
"content": [],
"status": "incomplete",
}
assert should_continue_final_message([reasoning_item]) is True
def test_completed_reasoning_returns_false(self):
"""Completed reasoning should not be continued."""
reasoning_item = ResponseReasoningItem(
id="reasoning_123",
summary=[],
type="reasoning",
content=[
Content(
text="I have thought about this.",
type="reasoning_text",
)
],
encrypted_content=None,
status="completed",
)
assert should_continue_final_message([reasoning_item]) is False
def test_reasoning_with_none_status_returns_false(self):
"""Reasoning with None status should not be continued."""
reasoning_item = ResponseReasoningItem(
id="reasoning_123",
summary=[],
type="reasoning",
content=[
Content(
text="Some reasoning",
type="reasoning_text",
)
],
encrypted_content=None,
status=None,
)
assert should_continue_final_message([reasoning_item]) is False
def test_only_last_item_matters(self):
"""Only the last item in the list determines continuation."""
completed_item = ResponseOutputMessage(
id="msg_1",
content=[
ResponseOutputText(
annotations=[],
text="Complete message.",
type="output_text",
logprobs=None,
)
],
role="assistant",
status="completed",
type="message",
)
in_progress_item = ResponseOutputMessage(
id="msg_2",
content=[
ResponseOutputText(
annotations=[],
text="Partial message...",
type="output_text",
logprobs=None,
)
],
role="assistant",
status="in_progress",
type="message",
)
# In-progress as last item -> should continue
assert should_continue_final_message([completed_item, in_progress_item]) is True
# Completed as last item -> should not continue
assert (
should_continue_final_message([in_progress_item, completed_item]) is False
)
def test_tool_call_returns_false(self):
"""Tool calls should not trigger continuation."""
tool_call = ResponseFunctionToolCall(
id="fc_123",
call_id="call_123",
type="function_call",
status="in_progress",
name="get_weather",
arguments='{"location": "NYC"}',
)
assert should_continue_final_message([tool_call]) is False
tool_call = {
"id": "msg_123",
"call_id": "call_123",
"type": "function_call",
"status": "in_progress",
"name": "get_weather",
"arguments": '{"location": "NYC"}',
}
assert should_continue_final_message([tool_call]) is False
# Tests for dict inputs (e.g., from curl requests)
def test_dict_in_progress_message_returns_true(self):
"""Dict with in_progress status should be continued (curl input)."""
dict_item = {
"id": "msg_123",
"type": "message",
"role": "assistant",
"status": "in_progress",
"content": [{"type": "output_text", "text": "The answer is ("}],
}
assert should_continue_final_message([dict_item]) is True
def test_dict_incomplete_message_returns_true(self):
"""Dict with incomplete status should be continued (curl input)."""
dict_item = {
"id": "msg_123",
"type": "message",
"role": "assistant",
"status": "incomplete",
"content": [{"type": "output_text", "text": "Partial answer"}],
}
assert should_continue_final_message([dict_item]) is True
def test_dict_completed_message_returns_false(self):
"""Dict with completed status should not be continued (curl input)."""
dict_item = {
"id": "msg_123",
"type": "message",
"role": "assistant",
"status": "completed",
"content": [{"type": "output_text", "text": "Complete answer."}],
}
assert should_continue_final_message([dict_item]) is False
def test_dict_reasoning_in_progress_returns_true(self):
"""Dict reasoning item with in_progress status should be continued."""
dict_item = {
"id": "reasoning_123",
"type": "reasoning",
"status": "in_progress",
"content": [{"type": "reasoning_text", "text": "Let me think..."}],
}
assert should_continue_final_message([dict_item]) is True
def test_dict_without_status_returns_false(self):
"""Dict without status field should not be continued."""
dict_item = {
"id": "msg_123",
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": "Some text"}],
}
assert should_continue_final_message([dict_item]) is False
def test_dict_with_none_status_returns_false(self):
"""Dict with None status should not be continued."""
dict_item = {
"id": "msg_123",
"type": "message",
"role": "assistant",
"status": None,
"content": [{"type": "output_text", "text": "Some text"}],
}
assert should_continue_final_message([dict_item]) is False
class TestMaybeCombineReasoningAndToolCall:
"""Tests for _maybe_combine_reasoning_and_tool_call function."""