[Misc][Harmony] Move Responses API only harmony utils to responses/harmony.py (#35339)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
2026-02-26 01:35:16 -05:00
parent 4a9c07a0a2
commit 186ea22efe
6 changed files with 1040 additions and 990 deletions
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -2,13 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 import pytest
-from openai.types.responses import (
-    ResponseFunctionToolCall,
-    ResponseOutputMessage,
-    ResponseReasoningItem,
-)
-from openai.types.responses.response_output_item import McpCall
-from openai_harmony import Author, Message, Role, TextContent
+from openai_harmony import Message, Role

 from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
@@ -18,20 +12,21 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
    has_custom_tools,
    parse_chat_input_to_harmony_message,
    parse_chat_output,
-    parse_input_to_harmony_message,
-    parse_output_message,
+)
+from vllm.entrypoints.openai.responses.harmony import (
+    response_previous_input_to_harmony,
 )


 class TestCommonParseInputToHarmonyMessage:
    """
    Tests for scenarios that are common to both Chat Completion
-    parse_chat_input_to_harmony_message and Responsees API
-    parse_input_to_harmony_message functions.
+    parse_chat_input_to_harmony_message and Responses API
+    response_previous_input_to_harmony functions.
    """

    @pytest.fixture(
-        params=[parse_chat_input_to_harmony_message, parse_input_to_harmony_message]
+        params=[parse_chat_input_to_harmony_message, response_previous_input_to_harmony]
    )
    def parse_function(self, request):
        return request.param
@@ -216,81 +211,6 @@ class TestCommonParseInputToHarmonyMessage:
        assert messages[0].content[1].text == "actual text"


-class TestParseInputToHarmonyMessage:
-    """
-    Tests for scenarios that are specific to the Responses API
-    parse_input_to_harmony_message function.
-    """
-
-    def test_message_with_empty_content(self):
-        """Test parsing message with empty string content."""
-        chat_msg = {
-            "role": "user",
-            "content": "",
-        }
-
-        messages = parse_input_to_harmony_message(chat_msg)
-
-        assert len(messages) == 1
-        assert messages[0].content[0].text == ""
-
-    def test_tool_message_with_string_content(self):
-        """Test parsing tool message with string content."""
-        chat_msg = {
-            "role": "tool",
-            "name": "get_weather",
-            "content": "The weather in San Francisco is sunny, 72°F",
-        }
-
-        messages = parse_input_to_harmony_message(chat_msg)
-
-        assert len(messages) == 1
-        assert messages[0].author.role == Role.TOOL
-        assert messages[0].author.name == "functions.get_weather"
-        assert (
-            messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F"
-        )
-        assert messages[0].channel == "commentary"
-
-    def test_tool_message_with_array_content(self):
-        """Test parsing tool message with array content."""
-        chat_msg = {
-            "role": "tool",
-            "name": "search_results",
-            "content": [
-                {"type": "text", "text": "Result 1: "},
-                {"type": "text", "text": "Result 2: "},
-                {
-                    "type": "image",
-                    "url": "http://example.com/img.png",
-                },  # Should be ignored
-                {"type": "text", "text": "Result 3"},
-            ],
-        }
-
-        messages = parse_input_to_harmony_message(chat_msg)
-
-        assert len(messages) == 1
-        assert messages[0].author.role == Role.TOOL
-        assert messages[0].author.name == "functions.search_results"
-        assert messages[0].content[0].text == "Result 1: Result 2: Result 3"
-
-    def test_tool_message_with_empty_content(self):
-        """Test parsing tool message with None content."""
-        chat_msg = {
-            "role": "tool",
-            "name": "empty_tool",
-            "content": None,
-        }
-
-        messages = parse_input_to_harmony_message(chat_msg)
-
-        assert len(messages) == 1
-        assert messages[0].author.role == Role.TOOL
-        assert messages[0].author.name == "functions.empty_tool"
-        assert messages[0].content[0].text == ""
-
-
 class TestParseChatInputToHarmonyMessage:
    """
    Tests for scenarios that are specific to the Chat Completion API
@@ -888,200 +808,6 @@ class TestParseChatOutput:
        assert final_content == "Let me look that up.\nThe answer is 42."


-class TestParseOutputMessage:
-    """Tests for parse_output_message function."""
-
-    def test_commentary_with_no_recipient_creates_message(self):
-        """Test that commentary with recipient=None (preambles) creates message items.
-
-        Per Harmony format, preambles are intended to be shown to end-users,
-        unlike analysis channel content which is hidden reasoning.
-        See: https://cookbook.openai.com/articles/openai-harmony
-        """
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, "I will now search for the weather information."
-        )
-        message = message.with_channel("commentary")
-        # recipient is None by default, representing a preamble
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseOutputMessage)
-        assert output_items[0].type == "message"
-        assert output_items[0].role == "assistant"
-        assert output_items[0].status == "completed"
-        assert len(output_items[0].content) == 1
-        assert output_items[0].content[0].type == "output_text"
-        assert (
-            output_items[0].content[0].text
-            == "I will now search for the weather information."
-        )
-
-    def test_commentary_with_function_recipient_creates_function_call(self):
-        """Test commentary with recipient='functions.X' creates function calls."""
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}'
-        )
-        message = message.with_channel("commentary")
-        message = message.with_recipient("functions.get_weather")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseFunctionToolCall)
-        assert output_items[0].type == "function_call"
-        assert output_items[0].name == "get_weather"
-        assert (
-            output_items[0].arguments
-            == '{"location": "San Francisco", "units": "celsius"}'
-        )
-        assert output_items[0].call_id.startswith("call_")
-        assert output_items[0].id.startswith("fc_")
-
-    def test_commentary_with_python_recipient_creates_reasoning(self):
-        """Test that commentary with recipient='python' creates reasoning items."""
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))"
-        )
-        message = message.with_channel("commentary")
-        message = message.with_recipient("python")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
-        assert output_items[0].type == "reasoning"
-        assert (
-            output_items[0].content[0].text
-            == "import numpy as np\nprint(np.array([1, 2, 3]))"
-        )
-
-    def test_commentary_with_browser_recipient_creates_reasoning(self):
-        """Test that commentary with recipient='browser' creates reasoning items."""
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, "Navigating to the specified URL"
-        )
-        message = message.with_channel("commentary")
-        message = message.with_recipient("browser")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
-        assert output_items[0].type == "reasoning"
-        assert output_items[0].content[0].text == "Navigating to the specified URL"
-
-    def test_commentary_with_container_recipient_creates_reasoning(self):
-        """Test that commentary with recipient='container' creates reasoning items."""
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, "Running command in container"
-        )
-        message = message.with_channel("commentary")
-        message = message.with_recipient("container")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
-        assert output_items[0].type == "reasoning"
-        assert output_items[0].content[0].text == "Running command in container"
-
-    def test_commentary_with_empty_content_and_no_recipient(self):
-        """Test edge case: empty commentary with recipient=None."""
-        message = Message.from_role_and_content(Role.ASSISTANT, "")
-        message = message.with_channel("commentary")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseOutputMessage)
-        assert output_items[0].content[0].text == ""
-
-    def test_commentary_with_multiple_contents_and_no_recipient(self):
-        """Test multiple content items in commentary with no recipient."""
-        contents = [
-            TextContent(text="Step 1: Analyze the request"),
-            TextContent(text="Step 2: Prepare to call functions"),
-        ]
-        message = Message.from_role_and_contents(Role.ASSISTANT, contents)
-        message = message.with_channel("commentary")
-
-        output_items = parse_output_message(message)
-
-        # _parse_final_message returns single ResponseOutputMessage with
-        # multiple contents
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseOutputMessage)
-        assert len(output_items[0].content) == 2
-        assert output_items[0].content[0].text == "Step 1: Analyze the request"
-        assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
-
-    def test_commentary_with_multiple_function_calls(self):
-        """Test multiple function calls in commentary channel."""
-        contents = [
-            TextContent(text='{"location": "San Francisco"}'),
-            TextContent(text='{"location": "New York"}'),
-        ]
-        message = Message.from_role_and_contents(Role.ASSISTANT, contents)
-        message = message.with_channel("commentary")
-        message = message.with_recipient("functions.get_weather")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 2
-        assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items)
-        assert output_items[0].name == "get_weather"
-        assert output_items[1].name == "get_weather"
-        assert output_items[0].arguments == '{"location": "San Francisco"}'
-        assert output_items[1].arguments == '{"location": "New York"}'
-
-    def test_commentary_with_unknown_recipient_creates_mcp_call(self):
-        """Test that commentary with unknown recipient creates MCP call."""
-        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
-        message = message.with_channel("commentary")
-        message = message.with_recipient("custom_tool")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], McpCall)
-        assert output_items[0].type == "mcp_call"
-        assert output_items[0].name == "custom_tool"
-        assert output_items[0].server_label == "custom_tool"
-
-    def test_analysis_channel_creates_reasoning(self):
-        """Test that analysis channel creates reasoning items."""
-        message = Message.from_role_and_content(
-            Role.ASSISTANT, "Analyzing the problem step by step..."
-        )
-        message = message.with_channel("analysis")
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
-        assert output_items[0].type == "reasoning"
-        assert (
-            output_items[0].content[0].text == "Analyzing the problem step by step..."
-        )
-
-    def test_non_assistant_message_returns_empty(self):
-        """Test that non-assistant messages return empty list.
-
-        Per the implementation, tool messages to assistant (e.g., search results)
-        are not included in final output to align with OpenAI behavior.
-        """
-        message = Message.from_author_and_content(
-            Author.new(Role.TOOL, "functions.get_weather"),
-            "The weather is sunny, 72°F",
-        )
-
-        output_items = parse_output_message(message)
-
-        assert len(output_items) == 0
-
-
 def test_has_custom_tools() -> None:
    assert not has_custom_tools(set())
    assert not has_custom_tools({"web_search_preview", "code_interpreter", "container"})
@@ -1091,185 +817,6 @@ def test_has_custom_tools() -> None:
    )


-def test_parse_mcp_call_basic() -> None:
-    """Test that MCP calls are parsed with correct type and server_label."""
-    message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
-    message = message.with_recipient("filesystem")
-    message = message.with_channel("commentary")
-
-    output_items = parse_output_message(message)
-
-    assert len(output_items) == 1
-    assert isinstance(output_items[0], McpCall)
-    assert output_items[0].type == "mcp_call"
-    assert output_items[0].name == "filesystem"
-    assert output_items[0].server_label == "filesystem"
-    assert output_items[0].arguments == '{"path": "/tmp"}'
-    assert output_items[0].status == "completed"
-
-
-def test_parse_mcp_call_dotted_recipient() -> None:
-    """Test that dotted recipients extract the tool name correctly."""
-    message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
-    message = message.with_recipient("repo_browser.list")
-    message = message.with_channel("commentary")
-
-    output_items = parse_output_message(message)
-
-    assert len(output_items) == 1
-    assert isinstance(output_items[0], McpCall)
-    assert output_items[0].name == "list"
-    assert output_items[0].server_label == "repo_browser"
-
-
-def test_mcp_vs_function_call() -> None:
-    """Test that function calls are not parsed as MCP calls."""
-    func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
-    func_message = func_message.with_recipient("functions.my_tool")
-    func_message = func_message.with_channel("commentary")
-
-    func_items = parse_output_message(func_message)
-
-    assert len(func_items) == 1
-    assert not isinstance(func_items[0], McpCall)
-    assert func_items[0].type == "function_call"
-
-
-def test_mcp_vs_builtin_tools() -> None:
-    """Test that built-in tools (python, container) are not parsed as MCP calls."""
-    # Test python (built-in tool) - should be reasoning, not MCP
-    python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
-    python_message = python_message.with_recipient("python")
-    python_message = python_message.with_channel("commentary")
-
-    python_items = parse_output_message(python_message)
-
-    assert len(python_items) == 1
-    assert not isinstance(python_items[0], McpCall)
-    assert python_items[0].type == "reasoning"
-
-
-def test_parse_remaining_state_commentary_channel() -> None:
-    """Test parse_remaining_state with commentary channel and various recipients."""
-    from unittest.mock import Mock
-
-    from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
-
-    # Test 1: functions.* recipient → should return function tool call
-    parser_func = Mock()
-    parser_func.current_content = '{"arg": "value"}'
-    parser_func.current_role = Role.ASSISTANT
-    parser_func.current_channel = "commentary"
-    parser_func.current_recipient = "functions.my_tool"
-
-    func_items = parse_remaining_state(parser_func)
-
-    assert len(func_items) == 1
-    assert not isinstance(func_items[0], McpCall)
-    assert func_items[0].type == "function_call"
-    assert func_items[0].name == "my_tool"
-    assert func_items[0].status == "in_progress"
-
-    # Test 2: MCP tool (not builtin) → should return MCP call
-    parser_mcp = Mock()
-    parser_mcp.current_content = '{"path": "/tmp"}'
-    parser_mcp.current_role = Role.ASSISTANT
-    parser_mcp.current_channel = "commentary"
-    parser_mcp.current_recipient = "filesystem"
-
-    mcp_items = parse_remaining_state(parser_mcp)
-
-    assert len(mcp_items) == 1
-    assert isinstance(mcp_items[0], McpCall)
-    assert mcp_items[0].type == "mcp_call"
-    assert mcp_items[0].name == "filesystem"
-    assert mcp_items[0].server_label == "filesystem"
-    assert mcp_items[0].status == "in_progress"
-
-    # Test 3: Built-in tool (python)
-    # should NOT return MCP call, returns reasoning (internal tool interaction)
-    parser_builtin = Mock()
-    parser_builtin.current_content = "print('hello')"
-    parser_builtin.current_role = Role.ASSISTANT
-    parser_builtin.current_channel = "commentary"
-    parser_builtin.current_recipient = "python"
-
-    builtin_items = parse_remaining_state(parser_builtin)
-
-    # Built-in tools explicitly return reasoning
-    assert len(builtin_items) == 1
-    assert not isinstance(builtin_items[0], McpCall)
-    assert builtin_items[0].type == "reasoning"
-
-    # Test 4: No recipient (preamble) → should return message, not reasoning
-    parser_preamble = Mock()
-    parser_preamble.current_content = "I'll search for that information now."
-    parser_preamble.current_role = Role.ASSISTANT
-    parser_preamble.current_channel = "commentary"
-    parser_preamble.current_recipient = None
-
-    preamble_items = parse_remaining_state(parser_preamble)
-
-    assert len(preamble_items) == 1
-    assert isinstance(preamble_items[0], ResponseOutputMessage)
-    assert preamble_items[0].type == "message"
-    assert preamble_items[0].content[0].text == "I'll search for that information now."
-    assert preamble_items[0].status == "incomplete"  # streaming
-
-
-def test_parse_remaining_state_analysis_channel() -> None:
-    """Test parse_remaining_state with analysis channel and various recipients."""
-    from unittest.mock import Mock
-
-    from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
-
-    # Test 1: functions.* recipient → should return function tool call
-    parser_func = Mock()
-    parser_func.current_content = '{"arg": "value"}'
-    parser_func.current_role = Role.ASSISTANT
-    parser_func.current_channel = "analysis"
-    parser_func.current_recipient = "functions.my_tool"
-
-    func_items = parse_remaining_state(parser_func)
-
-    assert len(func_items) == 1
-    assert not isinstance(func_items[0], McpCall)
-    assert func_items[0].type == "function_call"
-    assert func_items[0].name == "my_tool"
-    assert func_items[0].status == "in_progress"
-
-    # Test 2: MCP tool (not builtin) → should return MCP call
-    parser_mcp = Mock()
-    parser_mcp.current_content = '{"query": "test"}'
-    parser_mcp.current_role = Role.ASSISTANT
-    parser_mcp.current_channel = "analysis"
-    parser_mcp.current_recipient = "database"
-
-    mcp_items = parse_remaining_state(parser_mcp)
-
-    assert len(mcp_items) == 1
-    assert isinstance(mcp_items[0], McpCall)
-    assert mcp_items[0].type == "mcp_call"
-    assert mcp_items[0].name == "database"
-    assert mcp_items[0].server_label == "database"
-    assert mcp_items[0].status == "in_progress"
-
-    # Test 3: Built-in tool (container)
-    # should NOT return MCP call, falls through to reasoning
-    parser_builtin = Mock()
-    parser_builtin.current_content = "docker run"
-    parser_builtin.current_role = Role.ASSISTANT
-    parser_builtin.current_channel = "analysis"
-    parser_builtin.current_recipient = "container"
-
-    builtin_items = parse_remaining_state(parser_builtin)
-
-    # Should fall through to reasoning logic
-    assert len(builtin_items) == 1
-    assert not isinstance(builtin_items[0], McpCall)
-    assert builtin_items[0].type == "reasoning"
-
-
 class TestGetSystemMessage:
    """Tests for get_system_message channel configuration."""

--- a/tests/entrypoints/openai/responses/test_harmony_utils.py
+++ b/tests/entrypoints/openai/responses/test_harmony_utils.py
@@ -0,0 +1,463 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for vllm.entrypoints.openai.responses.harmony."""
+
+from openai.types.responses import (
+    ResponseFunctionToolCall,
+    ResponseOutputMessage,
+    ResponseReasoningItem,
+)
+from openai.types.responses.response_output_item import McpCall
+from openai_harmony import Author, Message, Role, TextContent
+
+from vllm.entrypoints.openai.responses.harmony import (
+    harmony_to_response_output,
+    parser_state_to_response_output,
+    response_previous_input_to_harmony,
+)
+
+
+class TestResponsePreviousInputToHarmony:
+    """
+    Tests for scenarios that are specific to the Responses API
+    response_previous_input_to_harmony function.
+    """
+
+    def test_message_with_empty_content(self):
+        """Test parsing message with empty string content."""
+        chat_msg = {
+            "role": "user",
+            "content": "",
+        }
+
+        messages = response_previous_input_to_harmony(chat_msg)
+
+        assert len(messages) == 1
+        assert messages[0].content[0].text == ""
+
+    def test_tool_message_with_string_content(self):
+        """Test parsing tool message with string content."""
+        chat_msg = {
+            "role": "tool",
+            "name": "get_weather",
+            "content": "The weather in San Francisco is sunny, 72°F",
+        }
+
+        messages = response_previous_input_to_harmony(chat_msg)
+
+        assert len(messages) == 1
+        assert messages[0].author.role == Role.TOOL
+        assert messages[0].author.name == "functions.get_weather"
+        assert (
+            messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F"
+        )
+        assert messages[0].channel == "commentary"
+
+    def test_tool_message_with_array_content(self):
+        """Test parsing tool message with array content."""
+        chat_msg = {
+            "role": "tool",
+            "name": "search_results",
+            "content": [
+                {"type": "text", "text": "Result 1: "},
+                {"type": "text", "text": "Result 2: "},
+                {
+                    "type": "image",
+                    "url": "http://example.com/img.png",
+                },  # Should be ignored
+                {"type": "text", "text": "Result 3"},
+            ],
+        }
+
+        messages = response_previous_input_to_harmony(chat_msg)
+
+        assert len(messages) == 1
+        assert messages[0].author.role == Role.TOOL
+        assert messages[0].author.name == "functions.search_results"
+        assert messages[0].content[0].text == "Result 1: Result 2: Result 3"
+
+    def test_tool_message_with_empty_content(self):
+        """Test parsing tool message with None content."""
+        chat_msg = {
+            "role": "tool",
+            "name": "empty_tool",
+            "content": None,
+        }
+
+        messages = response_previous_input_to_harmony(chat_msg)
+
+        assert len(messages) == 1
+        assert messages[0].author.role == Role.TOOL
+        assert messages[0].author.name == "functions.empty_tool"
+        assert messages[0].content[0].text == ""
+
+
+class TestHarmonyToResponseOutput:
+    """Tests for harmony_to_response_output function."""
+
+    def test_commentary_with_no_recipient_creates_message(self):
+        """Test that commentary with recipient=None (preambles) creates message items.
+
+        Per Harmony format, preambles are intended to be shown to end-users,
+        unlike analysis channel content which is hidden reasoning.
+        See: https://cookbook.openai.com/articles/openai-harmony
+        """
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, "I will now search for the weather information."
+        )
+        message = message.with_channel("commentary")
+        # recipient is None by default, representing a preamble
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseOutputMessage)
+        assert output_items[0].type == "message"
+        assert output_items[0].role == "assistant"
+        assert output_items[0].status == "completed"
+        assert len(output_items[0].content) == 1
+        assert output_items[0].content[0].type == "output_text"
+        assert (
+            output_items[0].content[0].text
+            == "I will now search for the weather information."
+        )
+
+    def test_commentary_with_function_recipient_creates_function_call(self):
+        """Test commentary with recipient='functions.X' creates function calls."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}'
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].type == "function_call"
+        assert output_items[0].name == "get_weather"
+        assert (
+            output_items[0].arguments
+            == '{"location": "San Francisco", "units": "celsius"}'
+        )
+        assert output_items[0].call_id.startswith("call_")
+        assert output_items[0].id.startswith("fc_")
+
+    def test_commentary_with_python_recipient_creates_reasoning(self):
+        """Test that commentary with recipient='python' creates reasoning items."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))"
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("python")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseReasoningItem)
+        assert output_items[0].type == "reasoning"
+        assert (
+            output_items[0].content[0].text
+            == "import numpy as np\nprint(np.array([1, 2, 3]))"
+        )
+
+    def test_commentary_with_browser_recipient_creates_reasoning(self):
+        """Test that commentary with recipient='browser' creates reasoning items."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, "Navigating to the specified URL"
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("browser")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseReasoningItem)
+        assert output_items[0].type == "reasoning"
+        assert output_items[0].content[0].text == "Navigating to the specified URL"
+
+    def test_commentary_with_container_recipient_creates_reasoning(self):
+        """Test that commentary with recipient='container' creates reasoning items."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, "Running command in container"
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("container")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseReasoningItem)
+        assert output_items[0].type == "reasoning"
+        assert output_items[0].content[0].text == "Running command in container"
+
+    def test_commentary_with_empty_content_and_no_recipient(self):
+        """Test edge case: empty commentary with recipient=None."""
+        message = Message.from_role_and_content(Role.ASSISTANT, "")
+        message = message.with_channel("commentary")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseOutputMessage)
+        assert output_items[0].content[0].text == ""
+
+    def test_commentary_with_multiple_contents_and_no_recipient(self):
+        """Test multiple content items in commentary with no recipient."""
+        contents = [
+            TextContent(text="Step 1: Analyze the request"),
+            TextContent(text="Step 2: Prepare to call functions"),
+        ]
+        message = Message.from_role_and_contents(Role.ASSISTANT, contents)
+        message = message.with_channel("commentary")
+
+        output_items = harmony_to_response_output(message)
+
+        # _parse_final_message returns single ResponseOutputMessage with
+        # multiple contents
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseOutputMessage)
+        assert len(output_items[0].content) == 2
+        assert output_items[0].content[0].text == "Step 1: Analyze the request"
+        assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
+
+    def test_commentary_with_multiple_function_calls(self):
+        """Test multiple function calls in commentary channel."""
+        contents = [
+            TextContent(text='{"location": "San Francisco"}'),
+            TextContent(text='{"location": "New York"}'),
+        ]
+        message = Message.from_role_and_contents(Role.ASSISTANT, contents)
+        message = message.with_channel("commentary")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 2
+        assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items)
+        assert output_items[0].name == "get_weather"
+        assert output_items[1].name == "get_weather"
+        assert output_items[0].arguments == '{"location": "San Francisco"}'
+        assert output_items[1].arguments == '{"location": "New York"}'
+
+    def test_commentary_with_unknown_recipient_creates_mcp_call(self):
+        """Test that commentary with unknown recipient creates MCP call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("custom_tool")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+        assert output_items[0].type == "mcp_call"
+        assert output_items[0].name == "custom_tool"
+        assert output_items[0].server_label == "custom_tool"
+
+    def test_analysis_channel_creates_reasoning(self):
+        """Test that analysis channel creates reasoning items."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, "Analyzing the problem step by step..."
+        )
+        message = message.with_channel("analysis")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseReasoningItem)
+        assert output_items[0].type == "reasoning"
+        assert (
+            output_items[0].content[0].text == "Analyzing the problem step by step..."
+        )
+
+    def test_non_assistant_message_returns_empty(self):
+        """Test that non-assistant messages return empty list.
+
+        Per the implementation, tool messages to assistant (e.g., search results)
+        are not included in final output to align with OpenAI behavior.
+        """
+        message = Message.from_author_and_content(
+            Author.new(Role.TOOL, "functions.get_weather"),
+            "The weather is sunny, 72°F",
+        )
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 0
+
+
+def test_parse_mcp_call_basic() -> None:
+    """Test that MCP calls are parsed with correct type and server_label."""
+    message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
+    message = message.with_recipient("filesystem")
+    message = message.with_channel("commentary")
+
+    output_items = harmony_to_response_output(message)
+
+    assert len(output_items) == 1
+    assert isinstance(output_items[0], McpCall)
+    assert output_items[0].type == "mcp_call"
+    assert output_items[0].name == "filesystem"
+    assert output_items[0].server_label == "filesystem"
+    assert output_items[0].arguments == '{"path": "/tmp"}'
+    assert output_items[0].status == "completed"
+
+
+def test_parse_mcp_call_dotted_recipient() -> None:
+    """Test that dotted recipients extract the tool name correctly."""
+    message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
+    message = message.with_recipient("repo_browser.list")
+    message = message.with_channel("commentary")
+
+    output_items = harmony_to_response_output(message)
+
+    assert len(output_items) == 1
+    assert isinstance(output_items[0], McpCall)
+    assert output_items[0].name == "list"
+    assert output_items[0].server_label == "repo_browser"
+
+
+def test_mcp_vs_function_call() -> None:
+    """Test that function calls are not parsed as MCP calls."""
+    func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+    func_message = func_message.with_recipient("functions.my_tool")
+    func_message = func_message.with_channel("commentary")
+
+    func_items = harmony_to_response_output(func_message)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+
+
+def test_mcp_vs_builtin_tools() -> None:
+    """Test that built-in tools (python, container) are not parsed as MCP calls."""
+    # Test python (built-in tool) - should be reasoning, not MCP
+    python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
+    python_message = python_message.with_recipient("python")
+    python_message = python_message.with_channel("commentary")
+
+    python_items = harmony_to_response_output(python_message)
+
+    assert len(python_items) == 1
+    assert not isinstance(python_items[0], McpCall)
+    assert python_items[0].type == "reasoning"
+
+
+def test_parser_state_to_response_output_commentary_channel() -> None:
+    """Test parser_state_to_response_output with commentary
+    channel and various recipients."""
+    from unittest.mock import Mock
+
+    # Test 1: functions.* recipient -> should return function tool call
+    parser_func = Mock()
+    parser_func.current_content = '{"arg": "value"}'
+    parser_func.current_role = Role.ASSISTANT
+    parser_func.current_channel = "commentary"
+    parser_func.current_recipient = "functions.my_tool"
+
+    func_items = parser_state_to_response_output(parser_func)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+    assert func_items[0].name == "my_tool"
+    assert func_items[0].status == "in_progress"
+
+    # Test 2: MCP tool (not builtin) -> should return MCP call
+    parser_mcp = Mock()
+    parser_mcp.current_content = '{"path": "/tmp"}'
+    parser_mcp.current_role = Role.ASSISTANT
+    parser_mcp.current_channel = "commentary"
+    parser_mcp.current_recipient = "filesystem"
+
+    mcp_items = parser_state_to_response_output(parser_mcp)
+
+    assert len(mcp_items) == 1
+    assert isinstance(mcp_items[0], McpCall)
+    assert mcp_items[0].type == "mcp_call"
+    assert mcp_items[0].name == "filesystem"
+    assert mcp_items[0].server_label == "filesystem"
+    assert mcp_items[0].status == "in_progress"
+
+    # Test 3: Built-in tool (python)
+    # should NOT return MCP call, returns reasoning (internal tool interaction)
+    parser_builtin = Mock()
+    parser_builtin.current_content = "print('hello')"
+    parser_builtin.current_role = Role.ASSISTANT
+    parser_builtin.current_channel = "commentary"
+    parser_builtin.current_recipient = "python"
+
+    builtin_items = parser_state_to_response_output(parser_builtin)
+
+    # Built-in tools explicitly return reasoning
+    assert len(builtin_items) == 1
+    assert not isinstance(builtin_items[0], McpCall)
+    assert builtin_items[0].type == "reasoning"
+
+    # Test 4: No recipient (preamble) → should return message, not reasoning
+    parser_preamble = Mock()
+    parser_preamble.current_content = "I'll search for that information now."
+    parser_preamble.current_role = Role.ASSISTANT
+    parser_preamble.current_channel = "commentary"
+    parser_preamble.current_recipient = None
+
+    preamble_items = parser_state_to_response_output(parser_preamble)
+
+    assert len(preamble_items) == 1
+    assert isinstance(preamble_items[0], ResponseOutputMessage)
+    assert preamble_items[0].type == "message"
+    assert preamble_items[0].content[0].text == "I'll search for that information now."
+    assert preamble_items[0].status == "incomplete"  # streaming
+
+
+def test_parser_state_to_response_output_analysis_channel() -> None:
+    """Test parser_state_to_response_output with analysis
+    channel and various recipients."""
+    from unittest.mock import Mock
+
+    # Test 1: functions.* recipient -> should return function tool call
+    parser_func = Mock()
+    parser_func.current_content = '{"arg": "value"}'
+    parser_func.current_role = Role.ASSISTANT
+    parser_func.current_channel = "analysis"
+    parser_func.current_recipient = "functions.my_tool"
+
+    func_items = parser_state_to_response_output(parser_func)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+    assert func_items[0].name == "my_tool"
+    assert func_items[0].status == "in_progress"
+
+    # Test 2: MCP tool (not builtin) -> should return MCP call
+    parser_mcp = Mock()
+    parser_mcp.current_content = '{"query": "test"}'
+    parser_mcp.current_role = Role.ASSISTANT
+    parser_mcp.current_channel = "analysis"
+    parser_mcp.current_recipient = "database"
+
+    mcp_items = parser_state_to_response_output(parser_mcp)
+
+    assert len(mcp_items) == 1
+    assert isinstance(mcp_items[0], McpCall)
+    assert mcp_items[0].type == "mcp_call"
+    assert mcp_items[0].name == "database"
+    assert mcp_items[0].server_label == "database"
+    assert mcp_items[0].status == "in_progress"
+
+    # Test 3: Built-in tool (container)
+    # should NOT return MCP call, falls through to reasoning
+    parser_builtin = Mock()
+    parser_builtin.current_content = "docker run"
+    parser_builtin.current_role = Role.ASSISTANT
+    parser_builtin.current_channel = "analysis"
+    parser_builtin.current_recipient = "container"
+
+    builtin_items = parser_state_to_response_output(parser_builtin)
+
+    # Should fall through to reasoning logic
+    assert len(builtin_items) == 1
+    assert not isinstance(builtin_items[0], McpCall)
+    assert builtin_items[0].type == "reasoning"
--- a/tests/entrypoints/openai/responses/test_mcp_tools.py
+++ b/tests/entrypoints/openai/responses/test_mcp_tools.py
@@ -97,16 +97,16 @@ class TestMCPToolServerUnit:
        assert server.get_tool_description("test_server", allowed_tools=[]) is None

    def test_builtin_tools_consistency(self):
-        """MCP_BUILTIN_TOOLS must match _BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
+        """MCP_BUILTIN_TOOLS must match BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
        from vllm.entrypoints.openai.parser.harmony_utils import (
-            _BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
+            BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
            MCP_BUILTIN_TOOLS,
        )

-        assert set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, (
+        assert set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, (
            f"MCP_BUILTIN_TOOLS {MCP_BUILTIN_TOOLS} does not match "
-            f"_BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
-            f"{set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}"
+            f"BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
+            f"{set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}"
        )


--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -2,27 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 import datetime
-import json
 from collections.abc import Iterable, Sequence
 from typing import Literal

-from openai.types.responses import (
-    ResponseFunctionToolCall,
-    ResponseOutputItem,
-    ResponseOutputMessage,
-    ResponseOutputText,
-    ResponseReasoningItem,
-)
-from openai.types.responses.response_function_web_search import (
-    ActionFind,
-    ActionOpenPage,
-    ActionSearch,
-    ResponseFunctionWebSearch,
-)
-from openai.types.responses.response_output_item import McpCall
-from openai.types.responses.response_reasoning_item import (
-    Content as ResponseReasoningTextContent,
-)
 from openai.types.responses.tool import Tool
 from openai_harmony import (
    Author,
@@ -38,17 +20,10 @@ from openai_harmony import (
    ToolDescription,
    load_harmony_encoding,
 )
-from openai_harmony import Message as OpenAIHarmonyMessage
-from openai_harmony import Role as OpenAIHarmonyRole

 from vllm import envs
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionToolsParam
-from vllm.entrypoints.openai.responses.protocol import (
-    ResponseInputOutputItem,
-    ResponsesRequest,
-)
 from vllm.logger import init_logger
-from vllm.utils import random_uuid

 logger = init_logger(__name__)

@@ -64,14 +39,14 @@ _harmony_encoding = None
 # they are available and requested by the user.
 # Tool args are provided by MCP tool descriptions. Output
 # of the tools are stringified.
-_BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = {
+BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = {
    "python": "code_interpreter",
    "browser": "web_search_preview",
    "container": "container",
 }

 # Derive MCP_BUILTIN_TOOLS from the canonical mapping
-MCP_BUILTIN_TOOLS: set[str] = set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())
+MCP_BUILTIN_TOOLS: set[str] = set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())


 def has_custom_tools(tool_types: set[str]) -> bool:
@@ -179,55 +154,6 @@ def get_user_message(content: str) -> Message:
    return Message.from_role_and_content(Role.USER, content)


-def parse_response_input(
-    response_msg: ResponseInputOutputItem,
-    prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
-) -> Message:
-    if not isinstance(response_msg, dict):
-        response_msg = response_msg.model_dump()
-    if "type" not in response_msg or response_msg["type"] == "message":
-        role = response_msg["role"]
-        content = response_msg["content"]
-        # Add prefix for developer messages.
-        # <|start|>developer<|message|># Instructions {instructions}<|end|>
-        text_prefix = "Instructions:\n" if role == "developer" else ""
-        if isinstance(content, str):
-            msg = Message.from_role_and_content(role, text_prefix + content)
-        else:
-            contents = [TextContent(text=text_prefix + c["text"]) for c in content]
-            msg = Message.from_role_and_contents(role, contents)
-        if role == "assistant":
-            msg = msg.with_channel("final")
-    elif response_msg["type"] == "function_call_output":
-        call_id = response_msg["call_id"]
-        call_response: ResponseFunctionToolCall | None = None
-        for prev_response in reversed(prev_responses):
-            if (
-                isinstance(prev_response, ResponseFunctionToolCall)
-                and prev_response.call_id == call_id
-            ):
-                call_response = prev_response
-                break
-        if call_response is None:
-            raise ValueError(f"No call message found for {call_id}")
-        msg = Message.from_author_and_content(
-            Author.new(Role.TOOL, f"functions.{call_response.name}"),
-            response_msg["output"],
-        )
-    elif response_msg["type"] == "reasoning":
-        content = response_msg["content"]
-        assert len(content) == 1
-        msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
-    elif response_msg["type"] == "function_call":
-        msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
-        msg = msg.with_channel("commentary")
-        msg = msg.with_recipient(f"functions.{response_msg['name']}")
-        msg = msg.with_content_type("json")
-    else:
-        raise ValueError(f"Unknown input type: {response_msg['type']}")
-    return msg
-
-
 def parse_chat_inputs_to_harmony_messages(chat_msgs: list) -> list[Message]:
    """
    Parse a list of messages from request.messages in the Chat Completion API to
@@ -390,139 +316,6 @@ def parse_chat_input_to_harmony_message(
    return msgs


-def parse_input_to_harmony_message(chat_msg) -> list[Message]:
-    """Parse a message from request.previous_input_messages
-    into Harmony messages.
-
-    Supports both OpenAI chat format ({"role": "..."}) and
-    Harmony format ({"author": {"role": "..."}}).
-    """
-    if not isinstance(chat_msg, dict):
-        chat_msg = chat_msg.model_dump(exclude_none=True)
-
-    if "author" in chat_msg and isinstance(chat_msg.get("author"), dict):
-        return [_parse_harmony_format_message(chat_msg)]
-
-    return _parse_chat_format_message(chat_msg)
-
-
-def _parse_harmony_format_message(chat_msg: dict) -> Message:
-    """Reconstruct a Message from Harmony-format dict,
-    preserving channel, recipient, and content_type."""
-    author_dict = chat_msg["author"]
-    role = author_dict.get("role")
-    name = author_dict.get("name")
-
-    raw_content = chat_msg.get("content", "")
-    if isinstance(raw_content, list):
-        # TODO: Support refusal and non-text content types.
-        contents = [TextContent(text=c.get("text", "")) for c in raw_content]
-    elif isinstance(raw_content, str):
-        contents = [TextContent(text=raw_content)]
-    else:
-        contents = [TextContent(text="")]
-
-    if name:
-        msg = Message.from_author_and_contents(Author.new(Role(role), name), contents)
-    else:
-        msg = Message.from_role_and_contents(Role(role), contents)
-
-    channel = chat_msg.get("channel")
-    if channel:
-        msg = msg.with_channel(channel)
-    recipient = chat_msg.get("recipient")
-    if recipient:
-        msg = msg.with_recipient(recipient)
-    content_type = chat_msg.get("content_type")
-    if content_type:
-        msg = msg.with_content_type(content_type)
-
-    return msg
-
-
-def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
-    """Parse an OpenAI chat-format dict into Harmony messages."""
-    role = chat_msg.get("role")
-    if role is None:
-        raise ValueError(f"Message has no 'role' key: {chat_msg}")
-
-    # Assistant message with tool calls
-    tool_calls = chat_msg.get("tool_calls")
-    if role == "assistant" and tool_calls:
-        msgs: list[Message] = []
-        for call in tool_calls:
-            func = call.get("function", {})
-            name = func.get("name", "")
-            arguments = func.get("arguments", "") or ""
-            msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
-            msg = msg.with_channel("commentary")
-            msg = msg.with_recipient(f"functions.{name}")
-            msg = msg.with_content_type("json")
-            msgs.append(msg)
-        return msgs
-
-    # Tool role message (tool output)
-    if role == "tool":
-        name = chat_msg.get("name", "")
-        if name and not name.startswith("functions."):
-            name = f"functions.{name}"
-        content = chat_msg.get("content", "") or ""
-        content = flatten_chat_text_content(content)
-        # NOTE: .with_recipient("assistant") is required on tool messages
-        # to match parse_chat_input_to_harmony_message behavior and ensure
-        # proper routing in the Harmony protocol.
-        msg = (
-            Message.from_author_and_content(Author.new(Role.TOOL, name), content)
-            .with_channel("commentary")
-            .with_recipient("assistant")
-        )
-        return [msg]
-
-    # Default: user/assistant/system messages
-    content = chat_msg.get("content", "")
-    if isinstance(content, str):
-        contents = [TextContent(text=content)]
-    else:
-        # TODO: Support refusal.
-        contents = [TextContent(text=c.get("text", "")) for c in content]
-    msg = Message.from_role_and_contents(role, contents)
-    return [msg]
-
-
-def construct_harmony_previous_input_messages(
-    request: ResponsesRequest,
-) -> list[OpenAIHarmonyMessage]:
-    messages: list[OpenAIHarmonyMessage] = []
-    if request.previous_input_messages:
-        for message in request.previous_input_messages:
-            # Handle both OpenAIHarmonyMessage objects and dictionary inputs
-            if isinstance(message, OpenAIHarmonyMessage):
-                message_role = message.author.role
-                # To match OpenAI, instructions, reasoning and tools are
-                # always taken from the most recent Responses API request
-                # not carried over from previous requests
-                if (
-                    message_role == OpenAIHarmonyRole.SYSTEM
-                    or message_role == OpenAIHarmonyRole.DEVELOPER
-                ):
-                    continue
-                messages.append(message)
-            else:
-                harmony_messages = parse_input_to_harmony_message(message)
-                for harmony_msg in harmony_messages:
-                    message_role = harmony_msg.author.role
-                    # To match OpenAI, instructions, reasoning and tools are
-                    # always taken from the most recent Responses API request
-                    # not carried over from previous requests
-                    if (
-                        message_role == OpenAIHarmonyRole.SYSTEM
-                        or message_role == OpenAIHarmonyRole.DEVELOPER
-                    ):
-                        continue
-                    messages.append(harmony_msg)
-    return messages
-
-
 def render_for_completion(messages: list[Message]) -> list[int]:
    conversation = Conversation.from_messages(messages)
    token_ids = get_encoding().render_conversation_for_completion(
@@ -531,313 +324,6 @@ def render_for_completion(messages: list[Message]) -> list[int]:
    return token_ids


-def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
-    """Parse browser tool calls (search, open, find) into web search items."""
-    if len(message.content) != 1:
-        raise ValueError("Invalid number of contents in browser message")
-    content = message.content[0]
-
-    # Parse JSON args (with retry detection)
-    try:
-        browser_call = json.loads(content.text)
-    except json.JSONDecodeError:
-        logger.warning(
-            "Invalid JSON in browser tool call, using error placeholder: %s",
-            content.text,
-        )
-        json_retry_output_message = (
-            f"Invalid JSON args, caught and retried: {content.text}"
-        )
-        browser_call = {
-            "query": json_retry_output_message,
-            "url": json_retry_output_message,
-            "pattern": json_retry_output_message,
-        }
-
-    # Create appropriate action based on recipient
-    if recipient == "browser.search":
-        action = ActionSearch(
-            query=f"cursor:{browser_call.get('query', '')}", type="search"
-        )
-    elif recipient == "browser.open":
-        action = ActionOpenPage(
-            url=f"cursor:{browser_call.get('url', '')}", type="open_page"
-        )
-    elif recipient == "browser.find":
-        action = ActionFind(
-            pattern=browser_call.get("pattern", ""),
-            url=f"cursor:{browser_call.get('url', '')}",
-            type="find",
-        )
-    else:
-        raise ValueError(f"Unknown browser action: {recipient}")
-
-    return ResponseFunctionWebSearch(
-        id=f"ws_{random_uuid()}",
-        action=action,
-        status="completed",
-        type="web_search_call",
-    )
-
-
-def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
-    """Parse function calls into function tool call items."""
-    function_name = recipient.split(".")[-1]
-    output_items = []
-    for content in message.content:
-        random_id = random_uuid()
-        response_item = ResponseFunctionToolCall(
-            arguments=content.text,
-            call_id=f"call_{random_id}",
-            type="function_call",
-            name=function_name,
-            id=f"fc_{random_id}",
-        )
-        output_items.append(response_item)
-    return output_items
-
-
-def _parse_reasoning(message: Message) -> list[ResponseOutputItem]:
-    """Parse reasoning/analysis content into reasoning items."""
-    output_items = []
-    for content in message.content:
-        reasoning_item = ResponseReasoningItem(
-            id=f"rs_{random_uuid()}",
-            summary=[],
-            type="reasoning",
-            content=[
-                ResponseReasoningTextContent(text=content.text, type="reasoning_text")
-            ],
-            status=None,
-        )
-        output_items.append(reasoning_item)
-    return output_items
-
-
-def _parse_final_message(message: Message) -> ResponseOutputItem:
-    """Parse final channel messages into output message items."""
-    contents = []
-    for content in message.content:
-        output_text = ResponseOutputText(
-            text=content.text,
-            annotations=[],  # TODO
-            type="output_text",
-            logprobs=None,  # TODO
-        )
-        contents.append(output_text)
-    return ResponseOutputMessage(
-        id=f"msg_{random_uuid()}",
-        content=contents,
-        role=message.author.role,
-        status="completed",
-        type="message",
-    )
-
-
-def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
-    """
-    Parse MCP recipient into (server_label, tool_name).
-
-    For dotted recipients like "repo_browser.list":
-        - server_label: "repo_browser" (namespace/server)
-        - tool_name: "list" (specific tool)
-
-    For simple recipients like "filesystem":
-        - server_label: "filesystem"
-        - tool_name: "filesystem"
-    """
-    if "." in recipient:
-        server_label = recipient.split(".")[0]
-        tool_name = recipient.split(".")[-1]
-    else:
-        server_label = recipient
-        tool_name = recipient
-    return server_label, tool_name
-
-
-def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
-    """Parse MCP calls into MCP call items."""
-    # Handle built-in tools that need server_label mapping
-    if recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
-        server_label = _BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient]
-        tool_name = recipient
-    else:
-        server_label, tool_name = _parse_mcp_recipient(recipient)
-
-    output_items = []
-    for content in message.content:
-        response_item = McpCall(
-            arguments=content.text,
-            type="mcp_call",
-            name=tool_name,
-            server_label=server_label,
-            id=f"mcp_{random_uuid()}",
-            status="completed",
-        )
-        output_items.append(response_item)
-    return output_items
-
-
-def _parse_message_no_recipient(
-    message: Message,
-) -> list[ResponseOutputItem]:
-    """Parse a Harmony message with no recipient based on its channel."""
-    if message.channel == "analysis":
-        return _parse_reasoning(message)
-
-    if message.channel in ("commentary", "final"):
-        # Per Harmony format, preambles (commentary with no recipient) and
-        # final channel content are both intended to be shown to end-users.
-        # See: https://cookbook.openai.com/articles/openai-harmony
-        return [_parse_final_message(message)]
-
-    raise ValueError(f"Unknown channel: {message.channel}")
-
-
-def parse_output_message(message: Message) -> list[ResponseOutputItem]:
-    """
-    Parse a Harmony message into a list of output response items.
-    """
-    if message.author.role != "assistant":
-        # This is a message from a tool to the assistant (e.g., search result).
-        # Don't include it in the final output for now. This aligns with
-        # OpenAI's behavior on models like o4-mini.
-        return []
-
-    output_items: list[ResponseOutputItem] = []
-    recipient = message.recipient
-
-    if recipient is not None:
-        # Browser tool calls (browser.search, browser.open, browser.find)
-        if recipient.startswith("browser."):
-            output_items.append(_parse_browser_tool_call(message, recipient))
-
-        # Function calls (should only happen on commentary channel)
-        elif message.channel == "commentary" and recipient.startswith("functions."):
-            output_items.extend(_parse_function_call(message, recipient))
-
-        # Built-in MCP tools (python, browser, container)
-        elif recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
-            output_items.extend(_parse_reasoning(message))
-
-        # All other recipients are MCP calls
-        else:
-            output_items.extend(_parse_mcp_call(message, recipient))
-
-    # No recipient - handle based on channel for non-tool messages
-    else:
-        output_items.extend(_parse_message_no_recipient(message))
-
-    return output_items
-
-
-def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
-    if not parser.current_content:
-        return []
-    if parser.current_role != Role.ASSISTANT:
-        return []
-    current_recipient = parser.current_recipient
-    if current_recipient is not None and current_recipient.startswith("browser."):
-        return []
-
-    if current_recipient and parser.current_channel in ("commentary", "analysis"):
-        if current_recipient.startswith("functions."):
-            rid = random_uuid()
-            return [
-                ResponseFunctionToolCall(
-                    arguments=parser.current_content,
-                    call_id=f"call_{rid}",
-                    type="function_call",
-                    name=current_recipient.split(".")[-1],
-                    id=f"fc_{rid}",
-                    status="in_progress",
-                )
-            ]
-        # Built-in MCP tools (python, browser, container)
-        elif current_recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
-            return [
-                ResponseReasoningItem(
-                    id=f"rs_{random_uuid()}",
-                    summary=[],
-                    type="reasoning",
-                    content=[
-                        ResponseReasoningTextContent(
-                            text=parser.current_content, type="reasoning_text"
-                        )
-                    ],
-                    status=None,
-                )
-            ]
-        # All other recipients are MCP calls
-        else:
-            rid = random_uuid()
-            server_label, tool_name = _parse_mcp_recipient(current_recipient)
-            return [
-                McpCall(
-                    arguments=parser.current_content,
-                    type="mcp_call",
-                    name=tool_name,
-                    server_label=server_label,
-                    id=f"mcp_{rid}",
-                    status="in_progress",
-                )
-            ]
-
-    if parser.current_channel == "commentary":
-        # Per Harmony format, preambles (commentary with no recipient) are
-        # intended to be shown to end-users, unlike analysis channel content.
-        output_text = ResponseOutputText(
-            text=parser.current_content,
-            annotations=[],
-            type="output_text",
-            logprobs=None,
-        )
-        return [
-            ResponseOutputMessage(
-                id=f"msg_{random_uuid()}",
-                content=[output_text],
-                role="assistant",
-                status="incomplete",
-                type="message",
-            )
-        ]
-
-    if parser.current_channel == "analysis":
-        return [
-            ResponseReasoningItem(
-                id=f"rs_{random_uuid()}",
-                summary=[],
-                type="reasoning",
-                content=[
-                    ResponseReasoningTextContent(
-                        text=parser.current_content, type="reasoning_text"
-                    )
-                ],
-                status=None,
-            )
-        ]
-
-    if parser.current_channel == "final":
-        output_text = ResponseOutputText(
-            text=parser.current_content,
-            annotations=[],  # TODO
-            type="output_text",
-            logprobs=None,  # TODO
-        )
-        text_item = ResponseOutputMessage(
-            id=f"msg_{random_uuid()}",
-            content=[output_text],
-            role="assistant",
-            # if the parser still has messages (ie if the generator got cut
-            # abruptly), this should be incomplete
-            status="incomplete",
-            type="message",
-        )
-        return [text_item]
-
-    return []
-
-
 def get_stop_tokens_for_assistant_actions() -> list[int]:
    return get_encoding().stop_tokens_for_assistant_actions()

--- a/vllm/entrypoints/openai/responses/harmony.py
+++ b/vllm/entrypoints/openai/responses/harmony.py
@@ -0,0 +1,552 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Harmony ↔ Responses API conversion utilities.
+
+Handles two directions:
+  1. Response Input → Harmony Messages  (input parsing)
+  2. Harmony Messages → Response Output Items  (output parsing)
+"""
+
+import json
+
+from openai.types.responses import (
+    ResponseFunctionToolCall,
+    ResponseOutputItem,
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseReasoningItem,
+)
+from openai.types.responses.response_function_web_search import (
+    ActionFind,
+    ActionOpenPage,
+    ActionSearch,
+    ResponseFunctionWebSearch,
+)
+from openai.types.responses.response_output_item import McpCall
+from openai.types.responses.response_reasoning_item import (
+    Content as ResponseReasoningTextContent,
+)
+from openai_harmony import Author, Message, Role, StreamableParser, TextContent
+
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
+    flatten_chat_text_content,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponseInputOutputItem,
+    ResponsesRequest,
+)
+from vllm.logger import init_logger
+from vllm.utils import random_uuid
+
+logger = init_logger(__name__)
+
+# ---------------------------------------------------------------------------
+# 1. Private helpers for input parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_harmony_format_message(chat_msg: dict) -> Message:
+    """Reconstruct a Message from Harmony-format dict,
+    preserving channel, recipient, and content_type."""
+    author_dict = chat_msg["author"]
+    role = author_dict.get("role")
+    name = author_dict.get("name")
+
+    raw_content = chat_msg.get("content", "")
+    if isinstance(raw_content, list):
+        # TODO: Support refusal and non-text content types.
+        contents = [TextContent(text=c.get("text", "")) for c in raw_content]
+    elif isinstance(raw_content, str):
+        contents = [TextContent(text=raw_content)]
+    else:
+        contents = [TextContent(text="")]
+
+    if name:
+        msg = Message.from_author_and_contents(Author.new(Role(role), name), contents)
+    else:
+        msg = Message.from_role_and_contents(Role(role), contents)
+
+    channel = chat_msg.get("channel")
+    if channel:
+        msg = msg.with_channel(channel)
+    recipient = chat_msg.get("recipient")
+    if recipient:
+        msg = msg.with_recipient(recipient)
+    content_type = chat_msg.get("content_type")
+    if content_type:
+        msg = msg.with_content_type(content_type)
+
+    return msg
+
+
+def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
+    """Parse an OpenAI chat-format dict into Harmony messages."""
+    role = chat_msg.get("role")
+    if role is None:
+        raise ValueError(f"Message has no 'role' key: {chat_msg}")
+
+    # Assistant message with tool calls
+    tool_calls = chat_msg.get("tool_calls")
+    if role == "assistant" and tool_calls:
+        msgs: list[Message] = []
+        for call in tool_calls:
+            func = call.get("function", {})
+            name = func.get("name", "")
+            arguments = func.get("arguments", "") or ""
+            msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
+            msg = msg.with_channel("commentary")
+            msg = msg.with_recipient(f"functions.{name}")
+            msg = msg.with_content_type("json")
+            msgs.append(msg)
+        return msgs
+
+    # Tool role message (tool output)
+    if role == "tool":
+        name = chat_msg.get("name", "")
+        if name and not name.startswith("functions."):
+            name = f"functions.{name}"
+        content = chat_msg.get("content", "") or ""
+        content = flatten_chat_text_content(content)
+        # NOTE: .with_recipient("assistant") is required on tool messages
+        # to match parse_chat_input_to_harmony_message behavior and ensure
+        # proper routing in the Harmony protocol.
+        msg = (
+            Message.from_author_and_content(Author.new(Role.TOOL, name), content)
+            .with_channel("commentary")
+            .with_recipient("assistant")
+        )
+        return [msg]
+
+    # Default: user/assistant/system messages
+    content = chat_msg.get("content", "")
+    if isinstance(content, str):
+        contents = [TextContent(text=content)]
+    else:
+        # TODO: Support refusal.
+        contents = [TextContent(text=c.get("text", "")) for c in content]
+    msg = Message.from_role_and_contents(role, contents)
+    return [msg]
+
+
+# ---------------------------------------------------------------------------
+# 2. Public input parsing functions
+# ---------------------------------------------------------------------------
+
+
+def response_input_to_harmony(
+    response_msg: ResponseInputOutputItem,
+    prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
+) -> Message:
+    """Convert a single ResponseInputOutputItem into a Harmony Message."""
+    if not isinstance(response_msg, dict):
+        response_msg = response_msg.model_dump()
+    if "type" not in response_msg or response_msg["type"] == "message":
+        role = response_msg["role"]
+        content = response_msg["content"]
+        # Add prefix for developer messages.
+        # <|start|>developer<|message|># Instructions {instructions}<|end|>
+        text_prefix = "Instructions:\n" if role == "developer" else ""
+        if isinstance(content, str):
+            msg = Message.from_role_and_content(role, text_prefix + content)
+        else:
+            contents = [TextContent(text=text_prefix + c["text"]) for c in content]
+            msg = Message.from_role_and_contents(role, contents)
+        if role == "assistant":
+            msg = msg.with_channel("final")
+    elif response_msg["type"] == "function_call_output":
+        call_id = response_msg["call_id"]
+        call_response: ResponseFunctionToolCall | None = None
+        for prev_response in reversed(prev_responses):
+            if (
+                isinstance(prev_response, ResponseFunctionToolCall)
+                and prev_response.call_id == call_id
+            ):
+                call_response = prev_response
+                break
+        if call_response is None:
+            raise ValueError(f"No call message found for {call_id}")
+        msg = Message.from_author_and_content(
+            Author.new(Role.TOOL, f"functions.{call_response.name}"),
+            response_msg["output"],
+        )
+    elif response_msg["type"] == "reasoning":
+        content = response_msg["content"]
+        assert len(content) == 1
+        msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
+    elif response_msg["type"] == "function_call":
+        msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
+        msg = msg.with_channel("commentary")
+        msg = msg.with_recipient(f"functions.{response_msg['name']}")
+        msg = msg.with_content_type("json")
+    else:
+        raise ValueError(f"Unknown input type: {response_msg['type']}")
+    return msg
+
+
+def response_previous_input_to_harmony(chat_msg) -> list[Message]:
+    """Parse a message from request.previous_input_messages
+    into Harmony messages.
+
+    Supports both OpenAI chat format ({"role": "..."}) and
+    Harmony format ({"author": {"role": "..."}}).
+    """
+    if not isinstance(chat_msg, dict):
+        chat_msg = chat_msg.model_dump(exclude_none=True)
+
+    if "author" in chat_msg and isinstance(chat_msg.get("author"), dict):
+        return [_parse_harmony_format_message(chat_msg)]
+
+    return _parse_chat_format_message(chat_msg)
+
+
+def construct_harmony_previous_input_messages(
+    request: ResponsesRequest,
+) -> list[Message]:
+    """Build a Harmony message list from request.previous_input_messages.
+
+    Filters out system/developer messages to match OpenAI behavior where
+    instructions are always taken from the most recent Responses API request.
+    """
+    messages: list[Message] = []
+    if request.previous_input_messages:
+        for message in request.previous_input_messages:
+            # Handle both Message objects and dictionary inputs
+            if isinstance(message, Message):
+                message_role = message.author.role
+                if message_role == Role.SYSTEM or message_role == Role.DEVELOPER:
+                    continue
+                messages.append(message)
+            else:
+                harmony_messages = response_previous_input_to_harmony(message)
+                for harmony_msg in harmony_messages:
+                    message_role = harmony_msg.author.role
+                    if message_role == Role.SYSTEM or message_role == Role.DEVELOPER:
+                        continue
+                    messages.append(harmony_msg)
+    return messages
+
+
+# ---------------------------------------------------------------------------
+# 3. Private helpers for output parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
+    """Parse browser tool calls (search, open, find) into web search items."""
+    if len(message.content) != 1:
+        raise ValueError("Invalid number of contents in browser message")
+    content = message.content[0]
+
+    # Parse JSON args (with retry detection)
+    try:
+        browser_call = json.loads(content.text)
+    except json.JSONDecodeError:
+        logger.warning(
+            "Invalid JSON in browser tool call, using error placeholder: %s",
+            content.text,
+        )
+        json_retry_output_message = (
+            f"Invalid JSON args, caught and retried: {content.text}"
+        )
+        browser_call = {
+            "query": json_retry_output_message,
+            "url": json_retry_output_message,
+            "pattern": json_retry_output_message,
+        }
+
+    # Create appropriate action based on recipient
+    if recipient == "browser.search":
+        action = ActionSearch(
+            query=f"cursor:{browser_call.get('query', '')}", type="search"
+        )
+    elif recipient == "browser.open":
+        action = ActionOpenPage(
+            url=f"cursor:{browser_call.get('url', '')}", type="open_page"
+        )
+    elif recipient == "browser.find":
+        action = ActionFind(
+            pattern=browser_call.get("pattern", ""),
+            url=f"cursor:{browser_call.get('url', '')}",
+            type="find",
+        )
+    else:
+        raise ValueError(f"Unknown browser action: {recipient}")
+
+    return ResponseFunctionWebSearch(
+        id=f"ws_{random_uuid()}",
+        action=action,
+        status="completed",
+        type="web_search_call",
+    )
+
+
+def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
+    """Parse function calls into function tool call items."""
+    function_name = recipient.split(".")[-1]
+    output_items = []
+    for content in message.content:
+        random_id = random_uuid()
+        response_item = ResponseFunctionToolCall(
+            arguments=content.text,
+            call_id=f"call_{random_id}",
+            type="function_call",
+            name=function_name,
+            id=f"fc_{random_id}",
+        )
+        output_items.append(response_item)
+    return output_items
+
+
+def _parse_reasoning(message: Message) -> list[ResponseOutputItem]:
+    """Parse reasoning/analysis content into reasoning items."""
+    output_items = []
+    for content in message.content:
+        reasoning_item = ResponseReasoningItem(
+            id=f"rs_{random_uuid()}",
+            summary=[],
+            type="reasoning",
+            content=[
+                ResponseReasoningTextContent(text=content.text, type="reasoning_text")
+            ],
+            status=None,
+        )
+        output_items.append(reasoning_item)
+    return output_items
+
+
+def _parse_final_message(message: Message) -> ResponseOutputItem:
+    """Parse final channel messages into output message items."""
+    contents = []
+    for content in message.content:
+        output_text = ResponseOutputText(
+            text=content.text,
+            annotations=[],  # TODO
+            type="output_text",
+            logprobs=None,  # TODO
+        )
+        contents.append(output_text)
+    return ResponseOutputMessage(
+        id=f"msg_{random_uuid()}",
+        content=contents,
+        role=message.author.role,
+        status="completed",
+        type="message",
+    )
+
+
+def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
+    """Parse MCP recipient into (server_label, tool_name).
+
+    For dotted recipients like "repo_browser.list":
+        - server_label: "repo_browser" (namespace/server)
+        - tool_name: "list" (specific tool)
+
+    For simple recipients like "filesystem":
+        - server_label: "filesystem"
+        - tool_name: "filesystem"
+    """
+    if "." in recipient:
+        server_label = recipient.split(".")[0]
+        tool_name = recipient.split(".")[-1]
+    else:
+        server_label = recipient
+        tool_name = recipient
+    return server_label, tool_name
+
+
+def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
+    """Parse MCP calls into MCP call items."""
+    # Handle built-in tools that need server_label mapping
+    if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+        server_label = BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient]
+        tool_name = recipient
+    else:
+        server_label, tool_name = _parse_mcp_recipient(recipient)
+
+    output_items = []
+    for content in message.content:
+        response_item = McpCall(
+            arguments=content.text,
+            type="mcp_call",
+            name=tool_name,
+            server_label=server_label,
+            id=f"mcp_{random_uuid()}",
+            status="completed",
+        )
+        output_items.append(response_item)
+    return output_items
+
+
+def _parse_message_no_recipient(
+    message: Message,
+) -> list[ResponseOutputItem]:
+    """Parse a Harmony message with no recipient based on its channel."""
+    if message.channel == "analysis":
+        return _parse_reasoning(message)
+
+    if message.channel in ("commentary", "final"):
+        # Per Harmony format, preambles (commentary with no recipient) and
+        # final channel content are both intended to be shown to end-users.
+        # See: https://cookbook.openai.com/articles/openai-harmony
+        return [_parse_final_message(message)]
+
+    raise ValueError(f"Unknown channel: {message.channel}")
+
+
+# ---------------------------------------------------------------------------
+# 4. Public output parsing functions
+# ---------------------------------------------------------------------------
+
+
+def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
+    """Parse a Harmony message into a list of output response items.
+
+    This is the main dispatcher that routes based on channel and recipient.
+    """
+    if message.author.role != "assistant":
+        # This is a message from a tool to the assistant (e.g., search result).
+        # Don't include it in the final output for now. This aligns with
+        # OpenAI's behavior on models like o4-mini.
+        return []
+
+    output_items: list[ResponseOutputItem] = []
+    recipient = message.recipient
+
+    if recipient is not None:
+        # Browser tool calls (browser.search, browser.open, browser.find)
+        if recipient.startswith("browser."):
+            output_items.append(_parse_browser_tool_call(message, recipient))
+
+        # Function calls (should only happen on commentary channel)
+        elif message.channel == "commentary" and recipient.startswith("functions."):
+            output_items.extend(_parse_function_call(message, recipient))
+
+        # Built-in MCP tools (python, browser, container)
+        elif recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+            output_items.extend(_parse_reasoning(message))
+
+        # All other recipients are MCP calls
+        else:
+            output_items.extend(_parse_mcp_call(message, recipient))
+
+    # No recipient - handle based on channel for non-tool messages
+    else:
+        output_items.extend(_parse_message_no_recipient(message))
+
+    return output_items
+
+
+def parser_state_to_response_output(
+    parser: StreamableParser,
+) -> list[ResponseOutputItem]:
+    """Extract in-progress response items from incomplete parser state.
+
+    Called when the parser has buffered content that hasn't formed a
+    complete message yet (e.g., generation was cut short).
+    """
+    if not parser.current_content:
+        return []
+    if parser.current_role != Role.ASSISTANT:
+        return []
+    current_recipient = parser.current_recipient
+    if current_recipient is not None and current_recipient.startswith("browser."):
+        return []
+
+    if current_recipient and parser.current_channel in ("commentary", "analysis"):
+        if current_recipient.startswith("functions."):
+            rid = random_uuid()
+            return [
+                ResponseFunctionToolCall(
+                    arguments=parser.current_content,
+                    call_id=f"call_{rid}",
+                    type="function_call",
+                    name=current_recipient.split(".")[-1],
+                    id=f"fc_{rid}",
+                    status="in_progress",
+                )
+            ]
+        # Built-in MCP tools (python, browser, container)
+        elif current_recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+            return [
+                ResponseReasoningItem(
+                    id=f"rs_{random_uuid()}",
+                    summary=[],
+                    type="reasoning",
+                    content=[
+                        ResponseReasoningTextContent(
+                            text=parser.current_content, type="reasoning_text"
+                        )
+                    ],
+                    status=None,
+                )
+            ]
+        # All other recipients are MCP calls
+        else:
+            rid = random_uuid()
+            server_label, tool_name = _parse_mcp_recipient(current_recipient)
+            return [
+                McpCall(
+                    arguments=parser.current_content,
+                    type="mcp_call",
+                    name=tool_name,
+                    server_label=server_label,
+                    id=f"mcp_{rid}",
+                    status="in_progress",
+                )
+            ]
+
+    if parser.current_channel == "commentary":
+        # Per Harmony format, preambles (commentary with no recipient) are
+        # intended to be shown to end-users, unlike analysis channel content.
+        output_text = ResponseOutputText(
+            text=parser.current_content,
+            annotations=[],
+            type="output_text",
+            logprobs=None,
+        )
+        return [
+            ResponseOutputMessage(
+                id=f"msg_{random_uuid()}",
+                content=[output_text],
+                role="assistant",
+                status="incomplete",
+                type="message",
+            )
+        ]
+
+    if parser.current_channel == "analysis":
+        return [
+            ResponseReasoningItem(
+                id=f"rs_{random_uuid()}",
+                summary=[],
+                type="reasoning",
+                content=[
+                    ResponseReasoningTextContent(
+                        text=parser.current_content, type="reasoning_text"
+                    )
+                ],
+                status=None,
+            )
+        ]
+
+    if parser.current_channel == "final":
+        output_text = ResponseOutputText(
+            text=parser.current_content,
+            annotations=[],  # TODO
+            type="output_text",
+            logprobs=None,  # TODO
+        )
+        text_item = ResponseOutputMessage(
+            id=f"msg_{random_uuid()}",
+            content=[output_text],
+            role="assistant",
+            # if the parser still has messages (ie if the generator got cut
+            # abruptly), this should be incomplete
+            status="incomplete",
+            type="message",
+        )
+        return [text_item]
+
+    return []
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -58,15 +58,11 @@ from vllm.entrypoints.openai.engine.serving import (
 )
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
 from vllm.entrypoints.openai.parser.harmony_utils import (
-    construct_harmony_previous_input_messages,
    get_developer_message,
    get_stop_tokens_for_assistant_actions,
    get_system_message,
    get_user_message,
    has_custom_tools,
-    parse_output_message,
-    parse_remaining_state,
-    parse_response_input,
    render_for_completion,
 )
 from vllm.entrypoints.openai.responses.context import (
@@ -76,6 +72,12 @@ from vllm.entrypoints.openai.responses.context import (
    SimpleContext,
    StreamingHarmonyContext,
 )
+from vllm.entrypoints.openai.responses.harmony import (
+    construct_harmony_previous_input_messages,
+    harmony_to_response_output,
+    parser_state_to_response_output,
+    response_input_to_harmony,
+)
 from vllm.entrypoints.openai.responses.protocol import (
    InputTokensDetails,
    OutputTokensDetails,
@@ -954,9 +956,9 @@ class OpenAIServingResponses(OpenAIServing):
        output_items: list[ResponseOutputItem] = []
        num_init_messages = context.num_init_messages
        for msg in context.messages[num_init_messages:]:
-            output_items.extend(parse_output_message(msg))
+            output_items.extend(harmony_to_response_output(msg))
        # Handle the generation stopped in the middle (if any).
-        last_items = parse_remaining_state(context.parser)
+        last_items = parser_state_to_response_output(context.parser)
        if last_items:
            output_items.extend(last_items)
        return output_items
@@ -1103,13 +1105,13 @@ class OpenAIServingResponses(OpenAIServing):
            else:
                prev_outputs = []
            for response_msg in request.input:
-                new_msg = parse_response_input(response_msg, prev_outputs)
+                new_msg = response_input_to_harmony(response_msg, prev_outputs)
                if new_msg.author.role != "system":
                    messages.append(new_msg)

                # User passes in a tool call request and its output. We need
-                # to add the tool call request to prev_outputs so that the
-                # parse_response_input can find the tool call request when
+                # to add the tool call request to prev_outputs so that
+                # response_input_to_harmony can find the tool call request when
                # parsing the tool call output.
                if isinstance(response_msg, ResponseFunctionToolCall):
                    prev_outputs.append(response_msg)