[responseAPI] support partial message generation (#32100)

Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <mitandrewxia@gmail.com> Signed-off-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: Andrew Xia <axia@fb.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
2026-01-13 13:41:26 -05:00
parent 6beef12b9b
commit af54d2e2d0
3 changed files with 337 additions and 0 deletions
--- a/tests/entrypoints/test_responses_utils.py
+++ b/tests/entrypoints/test_responses_utils.py
@@ -21,6 +21,7 @@ from vllm.entrypoints.responses_utils import (
    _maybe_combine_reasoning_and_tool_call,
    construct_chat_messages_with_tool_call,
    convert_tool_responses_to_completions_format,
+    should_continue_final_message,
 )


@@ -165,6 +166,285 @@ class TestResponsesUtils:
        assert formatted_item["content"] == "dongyi"


+class TestShouldContinueFinalMessage:
+    """Tests for should_continue_final_message function.
+
+    This function enables Anthropic-style partial message completion, where
+    users can provide an incomplete assistant message and have the model
+    continue from where it left off.
+    """
+
+    def test_string_input_returns_false(self):
+        """String input is always a user message, so should not continue."""
+        assert should_continue_final_message("Hello, world!") is False
+
+    def test_empty_list_returns_false(self):
+        """Empty list should not continue."""
+        assert should_continue_final_message([]) is False
+
+    def test_completed_message_returns_false(self):
+        """Completed message should not be continued."""
+        output_item = ResponseOutputMessage(
+            id="msg_123",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="The answer is 42.",
+                    type="output_text",
+                    logprobs=None,
+                )
+            ],
+            role="assistant",
+            status="completed",
+            type="message",
+        )
+        assert should_continue_final_message([output_item]) is False
+
+    def test_in_progress_message_returns_true(self):
+        """In-progress message should be continued.
+
+        This is the key use case for partial message completion.
+        Example: The user provides "The best answer is (" and wants
+        the model to continue from there.
+        """
+        output_item = ResponseOutputMessage(
+            id="msg_123",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="The best answer is (",
+                    type="output_text",
+                    logprobs=None,
+                )
+            ],
+            role="assistant",
+            status="in_progress",
+            type="message",
+        )
+        assert should_continue_final_message([output_item]) is True
+
+    def test_incomplete_message_returns_true(self):
+        """Incomplete message should be continued."""
+        output_item = ResponseOutputMessage(
+            id="msg_123",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="The answer",
+                    type="output_text",
+                    logprobs=None,
+                )
+            ],
+            role="assistant",
+            status="incomplete",
+            type="message",
+        )
+        assert should_continue_final_message([output_item]) is True
+
+    def test_in_progress_reasoning_returns_true(self):
+        """In-progress reasoning should be continued."""
+        reasoning_item = ResponseReasoningItem(
+            id="reasoning_123",
+            summary=[],
+            type="reasoning",
+            content=[
+                Content(
+                    text="Let me think about this...",
+                    type="reasoning_text",
+                )
+            ],
+            encrypted_content=None,
+            status="in_progress",
+        )
+        assert should_continue_final_message([reasoning_item]) is True
+
+    def test_incomplete_reasoning_returns_true(self):
+        """Incomplete reasoning should be continued."""
+        reasoning_item = ResponseReasoningItem(
+            id="reasoning_123",
+            summary=[],
+            type="reasoning",
+            content=[
+                Content(
+                    text="Let me think",
+                    type="reasoning_text",
+                )
+            ],
+            encrypted_content=None,
+            status="incomplete",
+        )
+        assert should_continue_final_message([reasoning_item]) is True
+
+        reasoning_item = {
+            "id": "reasoning_123",
+            "summary": [],
+            "type": "reasoning",
+            "content": [],
+            "status": "incomplete",
+        }
+        assert should_continue_final_message([reasoning_item]) is True
+
+    def test_completed_reasoning_returns_false(self):
+        """Completed reasoning should not be continued."""
+        reasoning_item = ResponseReasoningItem(
+            id="reasoning_123",
+            summary=[],
+            type="reasoning",
+            content=[
+                Content(
+                    text="I have thought about this.",
+                    type="reasoning_text",
+                )
+            ],
+            encrypted_content=None,
+            status="completed",
+        )
+        assert should_continue_final_message([reasoning_item]) is False
+
+    def test_reasoning_with_none_status_returns_false(self):
+        """Reasoning with None status should not be continued."""
+        reasoning_item = ResponseReasoningItem(
+            id="reasoning_123",
+            summary=[],
+            type="reasoning",
+            content=[
+                Content(
+                    text="Some reasoning",
+                    type="reasoning_text",
+                )
+            ],
+            encrypted_content=None,
+            status=None,
+        )
+        assert should_continue_final_message([reasoning_item]) is False
+
+    def test_only_last_item_matters(self):
+        """Only the last item in the list determines continuation."""
+        completed_item = ResponseOutputMessage(
+            id="msg_1",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="Complete message.",
+                    type="output_text",
+                    logprobs=None,
+                )
+            ],
+            role="assistant",
+            status="completed",
+            type="message",
+        )
+        in_progress_item = ResponseOutputMessage(
+            id="msg_2",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="Partial message...",
+                    type="output_text",
+                    logprobs=None,
+                )
+            ],
+            role="assistant",
+            status="in_progress",
+            type="message",
+        )
+
+        # In-progress as last item -> should continue
+        assert should_continue_final_message([completed_item, in_progress_item]) is True
+
+        # Completed as last item -> should not continue
+        assert (
+            should_continue_final_message([in_progress_item, completed_item]) is False
+        )
+
+    def test_tool_call_returns_false(self):
+        """Tool calls should not trigger continuation."""
+        tool_call = ResponseFunctionToolCall(
+            id="fc_123",
+            call_id="call_123",
+            type="function_call",
+            status="in_progress",
+            name="get_weather",
+            arguments='{"location": "NYC"}',
+        )
+        assert should_continue_final_message([tool_call]) is False
+
+        tool_call = {
+            "id": "msg_123",
+            "call_id": "call_123",
+            "type": "function_call",
+            "status": "in_progress",
+            "name": "get_weather",
+            "arguments": '{"location": "NYC"}',
+        }
+        assert should_continue_final_message([tool_call]) is False
+
+    # Tests for dict inputs (e.g., from curl requests)
+    def test_dict_in_progress_message_returns_true(self):
+        """Dict with in_progress status should be continued (curl input)."""
+        dict_item = {
+            "id": "msg_123",
+            "type": "message",
+            "role": "assistant",
+            "status": "in_progress",
+            "content": [{"type": "output_text", "text": "The answer is ("}],
+        }
+        assert should_continue_final_message([dict_item]) is True
+
+    def test_dict_incomplete_message_returns_true(self):
+        """Dict with incomplete status should be continued (curl input)."""
+        dict_item = {
+            "id": "msg_123",
+            "type": "message",
+            "role": "assistant",
+            "status": "incomplete",
+            "content": [{"type": "output_text", "text": "Partial answer"}],
+        }
+        assert should_continue_final_message([dict_item]) is True
+
+    def test_dict_completed_message_returns_false(self):
+        """Dict with completed status should not be continued (curl input)."""
+        dict_item = {
+            "id": "msg_123",
+            "type": "message",
+            "role": "assistant",
+            "status": "completed",
+            "content": [{"type": "output_text", "text": "Complete answer."}],
+        }
+        assert should_continue_final_message([dict_item]) is False
+
+    def test_dict_reasoning_in_progress_returns_true(self):
+        """Dict reasoning item with in_progress status should be continued."""
+        dict_item = {
+            "id": "reasoning_123",
+            "type": "reasoning",
+            "status": "in_progress",
+            "content": [{"type": "reasoning_text", "text": "Let me think..."}],
+        }
+        assert should_continue_final_message([dict_item]) is True
+
+    def test_dict_without_status_returns_false(self):
+        """Dict without status field should not be continued."""
+        dict_item = {
+            "id": "msg_123",
+            "type": "message",
+            "role": "assistant",
+            "content": [{"type": "output_text", "text": "Some text"}],
+        }
+        assert should_continue_final_message([dict_item]) is False
+
+    def test_dict_with_none_status_returns_false(self):
+        """Dict with None status should not be continued."""
+        dict_item = {
+            "id": "msg_123",
+            "type": "message",
+            "role": "assistant",
+            "status": None,
+            "content": [{"type": "output_text", "text": "Some text"}],
+        }
+        assert should_continue_final_message([dict_item]) is False
+
+
 class TestMaybeCombineReasoningAndToolCall:
    """Tests for _maybe_combine_reasoning_and_tool_call function."""