[responseAPI] support partial message generation (#32100)
Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <mitandrewxia@gmail.com> Signed-off-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: Andrew Xia <axia@fb.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
@@ -21,6 +21,7 @@ from vllm.entrypoints.responses_utils import (
|
||||
_maybe_combine_reasoning_and_tool_call,
|
||||
construct_chat_messages_with_tool_call,
|
||||
convert_tool_responses_to_completions_format,
|
||||
should_continue_final_message,
|
||||
)
|
||||
|
||||
|
||||
@@ -165,6 +166,285 @@ class TestResponsesUtils:
|
||||
assert formatted_item["content"] == "dongyi"
|
||||
|
||||
|
||||
class TestShouldContinueFinalMessage:
|
||||
"""Tests for should_continue_final_message function.
|
||||
|
||||
This function enables Anthropic-style partial message completion, where
|
||||
users can provide an incomplete assistant message and have the model
|
||||
continue from where it left off.
|
||||
"""
|
||||
|
||||
def test_string_input_returns_false(self):
|
||||
"""String input is always a user message, so should not continue."""
|
||||
assert should_continue_final_message("Hello, world!") is False
|
||||
|
||||
def test_empty_list_returns_false(self):
|
||||
"""Empty list should not continue."""
|
||||
assert should_continue_final_message([]) is False
|
||||
|
||||
def test_completed_message_returns_false(self):
|
||||
"""Completed message should not be continued."""
|
||||
output_item = ResponseOutputMessage(
|
||||
id="msg_123",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="The answer is 42.",
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="completed",
|
||||
type="message",
|
||||
)
|
||||
assert should_continue_final_message([output_item]) is False
|
||||
|
||||
def test_in_progress_message_returns_true(self):
|
||||
"""In-progress message should be continued.
|
||||
|
||||
This is the key use case for partial message completion.
|
||||
Example: The user provides "The best answer is (" and wants
|
||||
the model to continue from there.
|
||||
"""
|
||||
output_item = ResponseOutputMessage(
|
||||
id="msg_123",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="The best answer is (",
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="in_progress",
|
||||
type="message",
|
||||
)
|
||||
assert should_continue_final_message([output_item]) is True
|
||||
|
||||
def test_incomplete_message_returns_true(self):
|
||||
"""Incomplete message should be continued."""
|
||||
output_item = ResponseOutputMessage(
|
||||
id="msg_123",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="The answer",
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
type="message",
|
||||
)
|
||||
assert should_continue_final_message([output_item]) is True
|
||||
|
||||
def test_in_progress_reasoning_returns_true(self):
|
||||
"""In-progress reasoning should be continued."""
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="reasoning_123",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Let me think about this...",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status="in_progress",
|
||||
)
|
||||
assert should_continue_final_message([reasoning_item]) is True
|
||||
|
||||
def test_incomplete_reasoning_returns_true(self):
|
||||
"""Incomplete reasoning should be continued."""
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="reasoning_123",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Let me think",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status="incomplete",
|
||||
)
|
||||
assert should_continue_final_message([reasoning_item]) is True
|
||||
|
||||
reasoning_item = {
|
||||
"id": "reasoning_123",
|
||||
"summary": [],
|
||||
"type": "reasoning",
|
||||
"content": [],
|
||||
"status": "incomplete",
|
||||
}
|
||||
assert should_continue_final_message([reasoning_item]) is True
|
||||
|
||||
def test_completed_reasoning_returns_false(self):
|
||||
"""Completed reasoning should not be continued."""
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="reasoning_123",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="I have thought about this.",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status="completed",
|
||||
)
|
||||
assert should_continue_final_message([reasoning_item]) is False
|
||||
|
||||
def test_reasoning_with_none_status_returns_false(self):
|
||||
"""Reasoning with None status should not be continued."""
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="reasoning_123",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Some reasoning",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
assert should_continue_final_message([reasoning_item]) is False
|
||||
|
||||
def test_only_last_item_matters(self):
|
||||
"""Only the last item in the list determines continuation."""
|
||||
completed_item = ResponseOutputMessage(
|
||||
id="msg_1",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="Complete message.",
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="completed",
|
||||
type="message",
|
||||
)
|
||||
in_progress_item = ResponseOutputMessage(
|
||||
id="msg_2",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="Partial message...",
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="in_progress",
|
||||
type="message",
|
||||
)
|
||||
|
||||
# In-progress as last item -> should continue
|
||||
assert should_continue_final_message([completed_item, in_progress_item]) is True
|
||||
|
||||
# Completed as last item -> should not continue
|
||||
assert (
|
||||
should_continue_final_message([in_progress_item, completed_item]) is False
|
||||
)
|
||||
|
||||
def test_tool_call_returns_false(self):
|
||||
"""Tool calls should not trigger continuation."""
|
||||
tool_call = ResponseFunctionToolCall(
|
||||
id="fc_123",
|
||||
call_id="call_123",
|
||||
type="function_call",
|
||||
status="in_progress",
|
||||
name="get_weather",
|
||||
arguments='{"location": "NYC"}',
|
||||
)
|
||||
assert should_continue_final_message([tool_call]) is False
|
||||
|
||||
tool_call = {
|
||||
"id": "msg_123",
|
||||
"call_id": "call_123",
|
||||
"type": "function_call",
|
||||
"status": "in_progress",
|
||||
"name": "get_weather",
|
||||
"arguments": '{"location": "NYC"}',
|
||||
}
|
||||
assert should_continue_final_message([tool_call]) is False
|
||||
|
||||
# Tests for dict inputs (e.g., from curl requests)
|
||||
def test_dict_in_progress_message_returns_true(self):
|
||||
"""Dict with in_progress status should be continued (curl input)."""
|
||||
dict_item = {
|
||||
"id": "msg_123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "in_progress",
|
||||
"content": [{"type": "output_text", "text": "The answer is ("}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is True
|
||||
|
||||
def test_dict_incomplete_message_returns_true(self):
|
||||
"""Dict with incomplete status should be continued (curl input)."""
|
||||
dict_item = {
|
||||
"id": "msg_123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "incomplete",
|
||||
"content": [{"type": "output_text", "text": "Partial answer"}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is True
|
||||
|
||||
def test_dict_completed_message_returns_false(self):
|
||||
"""Dict with completed status should not be continued (curl input)."""
|
||||
dict_item = {
|
||||
"id": "msg_123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": [{"type": "output_text", "text": "Complete answer."}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is False
|
||||
|
||||
def test_dict_reasoning_in_progress_returns_true(self):
|
||||
"""Dict reasoning item with in_progress status should be continued."""
|
||||
dict_item = {
|
||||
"id": "reasoning_123",
|
||||
"type": "reasoning",
|
||||
"status": "in_progress",
|
||||
"content": [{"type": "reasoning_text", "text": "Let me think..."}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is True
|
||||
|
||||
def test_dict_without_status_returns_false(self):
|
||||
"""Dict without status field should not be continued."""
|
||||
dict_item = {
|
||||
"id": "msg_123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "Some text"}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is False
|
||||
|
||||
def test_dict_with_none_status_returns_false(self):
|
||||
"""Dict with None status should not be continued."""
|
||||
dict_item = {
|
||||
"id": "msg_123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": None,
|
||||
"content": [{"type": "output_text", "text": "Some text"}],
|
||||
}
|
||||
assert should_continue_final_message([dict_item]) is False
|
||||
|
||||
|
||||
class TestMaybeCombineReasoningAndToolCall:
|
||||
"""Tests for _maybe_combine_reasoning_and_tool_call function."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user