diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py
index 1d34fc51a..b73a0b074 100644
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -2,7 +2,11 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import pytest
-from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
+from openai.types.responses import (
+    ResponseFunctionToolCall,
+    ResponseOutputMessage,
+    ResponseReasoningItem,
+)
 from openai.types.responses.response_output_item import McpCall
 from openai_harmony import Author, Message, Role, TextContent
 
@@ -10,6 +14,7 @@ from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
     auto_drop_analysis_messages,
     get_encoding,
+    get_system_message,
     has_custom_tools,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
@@ -840,15 +845,58 @@ class TestParseChatOutput:
         assert reasoning == "I've thought hard about this."
         assert final_content == "The answer is 4."
 
+    def test_parse_chat_output_commentary_with_recipient_excluded(self) -> None:
+        """Commentary with a recipient (tool call) should not appear in
+        final_content — those are handled separately by the tool parser.
+
+        The first message is a preamble (visible), the second is a tool
+        call (excluded). Only the preamble should appear in final_content.
+        """
+        harmony_str = (
+            "<|channel|>commentary"
+            "<|message|>Let me check the weather.<|end|>"
+            "<|start|>assistant to=functions.get_weather"
+            "<|channel|>commentary"
+            '<|message|>{"location": "SF"}<|end|>'
+        )
+        token_ids = get_encoding().encode(harmony_str, allowed_special="all")
+        reasoning, final_content, _ = parse_chat_output(token_ids)
+        assert reasoning is None
+        assert final_content == "Let me check the weather."
+
+    def test_parse_chat_output_interrupted_preamble(self) -> None:
+        """Partial/interrupted preamble (commentary without recipient) should
+        appear in final_content, not reasoning."""
+        harmony_str = "<|channel|>commentary<|message|>I'll search for that"
+        token_ids = get_encoding().encode(harmony_str, allowed_special="all")
+        reasoning, final_content, _ = parse_chat_output(token_ids)
+        assert reasoning is None
+        assert final_content == "I'll search for that"
+
+    def test_parse_chat_output_preamble_then_final(self) -> None:
+        """Preamble followed by a final message should both appear in
+        final_content, joined by newline."""
+        harmony_str = (
+            "<|channel|>commentary"
+            "<|message|>Let me look that up.<|end|>"
+            "<|start|>assistant<|channel|>final"
+            "<|message|>The answer is 42.<|end|>"
+        )
+        token_ids = get_encoding().encode(harmony_str, allowed_special="all")
+        reasoning, final_content, _ = parse_chat_output(token_ids)
+        assert reasoning is None
+        assert final_content == "Let me look that up.\nThe answer is 42."
+
 
 class TestParseOutputMessage:
     """Tests for parse_output_message function."""
 
-    def test_commentary_with_no_recipient_creates_reasoning(self):
-        """Test that commentary with recipient=None (preambles) creates reasoning items.
+    def test_commentary_with_no_recipient_creates_message(self):
+        """Test that commentary with recipient=None (preambles) creates message items.
 
-        Per Harmony format, commentary channel can contain preambles to calling
-        multiple functions - explanatory text with no recipient.
+        Per Harmony format, preambles are intended to be shown to end-users,
+        unlike analysis channel content which is hidden reasoning.
+        See: https://cookbook.openai.com/articles/openai-harmony
         """
         message = Message.from_role_and_content(
             Role.ASSISTANT, "I will now search for the weather information."
@@ -859,13 +907,16 @@ class TestParseOutputMessage:
         output_items = parse_output_message(message)
 
         assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
-        assert output_items[0].type == "reasoning"
+        assert isinstance(output_items[0], ResponseOutputMessage)
+        assert output_items[0].type == "message"
+        assert output_items[0].role == "assistant"
+        assert output_items[0].status == "completed"
+        assert len(output_items[0].content) == 1
+        assert output_items[0].content[0].type == "output_text"
         assert (
             output_items[0].content[0].text
             == "I will now search for the weather information."
         )
-        assert output_items[0].content[0].type == "reasoning_text"
 
     def test_commentary_with_function_recipient_creates_function_call(self):
         """Test commentary with recipient='functions.X' creates function calls."""
@@ -944,7 +995,7 @@ class TestParseOutputMessage:
         output_items = parse_output_message(message)
 
         assert len(output_items) == 1
-        assert isinstance(output_items[0], ResponseReasoningItem)
+        assert isinstance(output_items[0], ResponseOutputMessage)
         assert output_items[0].content[0].text == ""
 
     def test_commentary_with_multiple_contents_and_no_recipient(self):
@@ -958,10 +1009,13 @@ class TestParseOutputMessage:
 
         output_items = parse_output_message(message)
 
-        assert len(output_items) == 2
-        assert all(isinstance(item, ResponseReasoningItem) for item in output_items)
+        # _parse_final_message returns single ResponseOutputMessage with
+        # multiple contents
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseOutputMessage)
+        assert len(output_items[0].content) == 2
         assert output_items[0].content[0].text == "Step 1: Analyze the request"
-        assert output_items[1].content[0].text == "Step 2: Prepare to call functions"
+        assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
 
     def test_commentary_with_multiple_function_calls(self):
         """Test multiple function calls in commentary channel."""
@@ -1133,7 +1187,7 @@ def test_parse_remaining_state_commentary_channel() -> None:
     assert mcp_items[0].status == "in_progress"
 
     # Test 3: Built-in tool (python)
-    # should NOT return MCP call, falls through to reasoning
+    # should NOT return MCP call, returns reasoning (internal tool interaction)
     parser_builtin = Mock()
     parser_builtin.current_content = "print('hello')"
     parser_builtin.current_role = Role.ASSISTANT
@@ -1142,11 +1196,26 @@ def test_parse_remaining_state_commentary_channel() -> None:
 
     builtin_items = parse_remaining_state(parser_builtin)
 
-    # Should fall through to reasoning logic
+    # Built-in tools explicitly return reasoning
     assert len(builtin_items) == 1
     assert not isinstance(builtin_items[0], McpCall)
     assert builtin_items[0].type == "reasoning"
 
+    # Test 4: No recipient (preamble) → should return message, not reasoning
+    parser_preamble = Mock()
+    parser_preamble.current_content = "I'll search for that information now."
+    parser_preamble.current_role = Role.ASSISTANT
+    parser_preamble.current_channel = "commentary"
+    parser_preamble.current_recipient = None
+
+    preamble_items = parse_remaining_state(parser_preamble)
+
+    assert len(preamble_items) == 1
+    assert isinstance(preamble_items[0], ResponseOutputMessage)
+    assert preamble_items[0].type == "message"
+    assert preamble_items[0].content[0].text == "I'll search for that information now."
+    assert preamble_items[0].status == "incomplete"  # streaming
+
 
 def test_parse_remaining_state_analysis_channel() -> None:
     """Test parse_remaining_state with analysis channel and various recipients."""
@@ -1199,3 +1268,29 @@ def test_parse_remaining_state_analysis_channel() -> None:
     assert len(builtin_items) == 1
     assert not isinstance(builtin_items[0], McpCall)
     assert builtin_items[0].type == "reasoning"
+
+
+class TestGetSystemMessage:
+    """Tests for get_system_message channel configuration."""
+
+    def test_commentary_channel_present_without_custom_tools(self) -> None:
+        """Commentary channel must be valid even without custom tools."""
+        sys_msg = get_system_message(with_custom_tools=False)
+        valid_channels = sys_msg.content[0].channel_config.valid_channels
+        assert "commentary" in valid_channels
+
+    def test_commentary_channel_present_with_custom_tools(self) -> None:
+        """Commentary channel present when custom tools are enabled."""
+        sys_msg = get_system_message(with_custom_tools=True)
+        valid_channels = sys_msg.content[0].channel_config.valid_channels
+        assert "commentary" in valid_channels
+
+    def test_all_standard_channels_present(self) -> None:
+        """All three standard Harmony channels should always be valid."""
+        for with_tools in (True, False):
+            sys_msg = get_system_message(with_custom_tools=with_tools)
+            valid_channels = sys_msg.content[0].channel_config.valid_channels
+            for channel in ("analysis", "commentary", "final"):
+                assert channel in valid_channels, (
+                    f"{channel} missing when with_custom_tools={with_tools}"
+                )
diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py
index af7de2026..78419c92a 100644
--- a/tests/entrypoints/openai/responses/test_harmony.py
+++ b/tests/entrypoints/openai/responses/test_harmony.py
@@ -712,15 +712,14 @@ async def test_function_calling_required(client: OpenAI, model_name: str):
 async def test_system_message_with_tools(client: OpenAI, model_name: str):
     from vllm.entrypoints.openai.parser.harmony_utils import get_system_message
 
-    # Test with custom tools enabled - commentary channel should be available
-    sys_msg = get_system_message(with_custom_tools=True)
-    valid_channels = sys_msg.content[0].channel_config.valid_channels
-    assert "commentary" in valid_channels
-
-    # Test with custom tools disabled - commentary channel should be removed
-    sys_msg = get_system_message(with_custom_tools=False)
-    valid_channels = sys_msg.content[0].channel_config.valid_channels
-    assert "commentary" not in valid_channels
+    # Commentary channel should always be present (needed for preambles)
+    # regardless of whether custom tools are enabled
+    for with_tools in (True, False):
+        sys_msg = get_system_message(with_custom_tools=with_tools)
+        valid_channels = sys_msg.content[0].channel_config.valid_channels
+        assert "commentary" in valid_channels, (
+            f"commentary channel missing when with_custom_tools={with_tools}"
+        )
 
 
 @pytest.mark.asyncio
diff --git a/tests/entrypoints/openai/responses/test_mcp_tools.py b/tests/entrypoints/openai/responses/test_mcp_tools.py
index add199b61..310af4308 100644
--- a/tests/entrypoints/openai/responses/test_mcp_tools.py
+++ b/tests/entrypoints/openai/responses/test_mcp_tools.py
@@ -172,13 +172,13 @@ class TestMCPEnabled:
             recipient = message.get("recipient")
             if recipient and recipient.startswith("python"):
                 tool_call_found = True
-                assert message.get("channel") == "analysis"
+                assert message.get("channel") == "commentary"
             author = message.get("author", {})
             if author.get("role") == "tool" and (author.get("name") or "").startswith(
                 "python"
             ):
                 tool_response_found = True
-                assert message.get("channel") == "analysis"
+                assert message.get("channel") == "commentary"
 
         assert tool_call_found, (
             f"No Python tool call found. "
diff --git a/tests/entrypoints/openai/test_serving_chat_stream_harmony.py b/tests/entrypoints/openai/test_serving_chat_stream_harmony.py
index 21d3d02ce..9f8c36f04 100644
--- a/tests/entrypoints/openai/test_serving_chat_stream_harmony.py
+++ b/tests/entrypoints/openai/test_serving_chat_stream_harmony.py
@@ -180,20 +180,13 @@ class TestExtractHarmonyStreamingDelta:
 
         assert delta_message.tool_calls[0].index == 1
 
-    @pytest.mark.parametrize(
-        "channel,recipient",
-        [
-            ("commentary", None),
-            ("commentary", "browser.search"),
-        ],
-    )
-    def test_returns_tool_call_preambles(self, channel, recipient):
-        """Test that invalid tool recipient on commentary is treated as content."""
+    def test_returns_preambles_as_content(self):
+        """Test that commentary with no recipient (preamble) is user content."""
         parser = MockStreamableParser()
         delta_text = "some text"
 
         token_states = [
-            TokenState(channel=channel, recipient=recipient, text=delta_text)
+            TokenState(channel="commentary", recipient=None, text=delta_text)
         ]
 
         delta_message, tools_streamed = extract_harmony_streaming_delta(
@@ -211,6 +204,7 @@ class TestExtractHarmonyStreamingDelta:
         [
             (None, None),
             ("unknown_channel", None),
+            ("commentary", "browser.search"),
         ],
     )
     def test_returns_none_for_invalid_inputs(self, channel, recipient):
diff --git a/tests/entrypoints/openai/test_serving_responses.py b/tests/entrypoints/openai/test_serving_responses.py
index 5cf07ac0f..291bfd442 100644
--- a/tests/entrypoints/openai/test_serving_responses.py
+++ b/tests/entrypoints/openai/test_serving_responses.py
@@ -26,6 +26,9 @@ from vllm.entrypoints.openai.responses.serving import (
     _extract_allowed_tools_from_mcp_requests,
     extract_tool_types,
 )
+from vllm.entrypoints.openai.responses.streaming_events import (
+    StreamingState,
+)
 from vllm.inputs.data import TokensPrompt
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -439,3 +442,115 @@ class TestExtractAllowedToolsFromMcpRequests:
             "server1": ["tool1"],
             "server2": ["tool2"],
         }
+
+
+class TestHarmonyPreambleStreaming:
+    """Tests for preamble (commentary with no recipient) streaming events."""
+
+    @staticmethod
+    def _make_ctx(*, channel, recipient, delta="hello"):
+        """Build a lightweight mock StreamingHarmonyContext."""
+        ctx = MagicMock()
+        ctx.last_content_delta = delta
+        ctx.parser.current_channel = channel
+        ctx.parser.current_recipient = recipient
+        return ctx
+
+    @staticmethod
+    def _make_previous_item(*, channel, recipient, text="preamble text"):
+        """Build a lightweight mock previous_item (openai_harmony Message)."""
+        content_part = MagicMock()
+        content_part.text = text
+        item = MagicMock()
+        item.channel = channel
+        item.recipient = recipient
+        item.content = [content_part]
+        return item
+
+    def test_preamble_delta_emits_text_events(self) -> None:
+        """commentary + recipient=None should emit output_text.delta events."""
+        from vllm.entrypoints.openai.responses.streaming_events import (
+            emit_content_delta_events,
+        )
+
+        ctx = self._make_ctx(channel="commentary", recipient=None)
+        state = StreamingState()
+
+        events = emit_content_delta_events(ctx, state)
+
+        type_names = [e.type for e in events]
+        assert "response.output_text.delta" in type_names
+        assert "response.output_item.added" in type_names
+
+    def test_preamble_delta_second_token_no_added(self) -> None:
+        """Second preamble token should emit delta only, not added again."""
+        from vllm.entrypoints.openai.responses.streaming_events import (
+            emit_content_delta_events,
+        )
+
+        ctx = self._make_ctx(channel="commentary", recipient=None, delta="w")
+        state = StreamingState()
+        state.sent_output_item_added = True
+        state.current_item_id = "msg_test"
+        state.current_content_index = 0
+
+        events = emit_content_delta_events(ctx, state)
+
+        type_names = [e.type for e in events]
+        assert "response.output_text.delta" in type_names
+        assert "response.output_item.added" not in type_names
+
+    def test_commentary_with_function_recipient_not_preamble(self) -> None:
+        """commentary + recipient='functions.X' must NOT use preamble path."""
+        from vllm.entrypoints.openai.responses.streaming_events import (
+            emit_content_delta_events,
+        )
+
+        ctx = self._make_ctx(
+            channel="commentary",
+            recipient="functions.get_weather",
+        )
+        state = StreamingState()
+
+        events = emit_content_delta_events(ctx, state)
+
+        type_names = [e.type for e in events]
+        assert "response.output_text.delta" not in type_names
+
+    def test_preamble_done_emits_text_done_events(self) -> None:
+        """Completed preamble should emit text done + content_part done +
+        output_item done, same shape as final channel."""
+        from vllm.entrypoints.openai.responses.streaming_events import (
+            emit_previous_item_done_events,
+        )
+
+        previous = self._make_previous_item(channel="commentary", recipient=None)
+        state = StreamingState()
+        state.current_item_id = "msg_test"
+        state.current_output_index = 0
+        state.current_content_index = 0
+
+        events = emit_previous_item_done_events(previous, state)
+
+        type_names = [e.type for e in events]
+        assert "response.output_text.done" in type_names
+        assert "response.content_part.done" in type_names
+        assert "response.output_item.done" in type_names
+
+    def test_commentary_with_recipient_no_preamble_done(self) -> None:
+        """commentary + recipient='functions.X' should route to function call
+        done, not preamble done."""
+        from vllm.entrypoints.openai.responses.streaming_events import (
+            emit_previous_item_done_events,
+        )
+
+        previous = self._make_previous_item(
+            channel="commentary", recipient="functions.get_weather"
+        )
+        state = StreamingState()
+        state.current_item_id = "fc_test"
+
+        events = emit_previous_item_done_events(previous, state)
+
+        type_names = [e.type for e in events]
+        assert "response.output_text.done" not in type_names
diff --git a/tests/entrypoints/test_context.py b/tests/entrypoints/test_context.py
index 1ab2b5edb..b1c8df4fa 100644
--- a/tests/entrypoints/test_context.py
+++ b/tests/entrypoints/test_context.py
@@ -236,6 +236,44 @@ def test_reasoning_tokens_counting(mock_parser):
     assert context.num_output_tokens == 4
 
 
+def test_preamble_tokens_not_counted_as_reasoning(mock_parser):
+    """Preambles (commentary with no recipient) are visible user text,
+    not hidden reasoning. They must NOT inflate num_reasoning_tokens."""
+    context = HarmonyContext(messages=[], available_tools=[])
+
+    mock_parser.current_channel = "commentary"
+    mock_parser.current_recipient = None  # preamble
+
+    mock_output = create_mock_request_output(
+        prompt_token_ids=[1, 2, 3],
+        output_token_ids=[4, 5, 6],
+        num_cached_tokens=0,
+    )
+    context.append_output(mock_output)
+
+    assert context.num_reasoning_tokens == 0
+    assert context.num_output_tokens == 3
+
+
+def test_commentary_with_recipient_counted_as_reasoning(mock_parser):
+    """Commentary directed at a tool (recipient != None) is hidden from
+    the user, so it should still count as reasoning tokens."""
+    context = HarmonyContext(messages=[], available_tools=[])
+
+    mock_parser.current_channel = "commentary"
+    mock_parser.current_recipient = "python"
+
+    mock_output = create_mock_request_output(
+        prompt_token_ids=[1, 2, 3],
+        output_token_ids=[4, 5, 6],
+        num_cached_tokens=0,
+    )
+    context.append_output(mock_output)
+
+    assert context.num_reasoning_tokens == 3
+    assert context.num_output_tokens == 3
+
+
 def test_zero_tokens_edge_case():
     """Test behavior with all zero token counts."""
     context = HarmonyContext(messages=[], available_tools=[])
diff --git a/vllm/entrypoints/openai/chat_completion/stream_harmony.py b/vllm/entrypoints/openai/chat_completion/stream_harmony.py
index 4dbdddd20..87f2f9b92 100644
--- a/vllm/entrypoints/openai/chat_completion/stream_harmony.py
+++ b/vllm/entrypoints/openai/chat_completion/stream_harmony.py
@@ -147,7 +147,7 @@ def extract_harmony_streaming_delta(
                         function=DeltaFunctionCall(arguments=group.text),
                     )
                 )
-        elif group.channel == "commentary":
+        elif group.channel == "commentary" and group.recipient is None:
             # Tool call preambles meant to be shown to the user
             combined_content += group.text
             content_encountered = True
diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py
index 486873db8..9dfd5f518 100644
--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -26,7 +26,6 @@ from openai.types.responses.response_reasoning_item import (
 from openai.types.responses.tool import Tool
 from openai_harmony import (
     Author,
-    ChannelConfig,
     Conversation,
     DeveloperContent,
     HarmonyEncodingName,
@@ -126,13 +125,6 @@ def get_system_message(
         sys_msg_content = sys_msg_content.with_tools(python_description)
     if container_description is not None:
         sys_msg_content = sys_msg_content.with_tools(container_description)
-    if not with_custom_tools:
-        channel_config = sys_msg_content.channel_config
-        invalid_channel = "commentary"
-        new_config = ChannelConfig.require_channels(
-            [c for c in channel_config.valid_channels if c != invalid_channel]
-        )
-        sys_msg_content = sys_msg_content.with_channel_config(new_config)
     sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
     return sys_msg
 
@@ -686,6 +678,22 @@ def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem
     return output_items
 
 
+def _parse_message_no_recipient(
+    message: Message,
+) -> list[ResponseOutputItem]:
+    """Parse a Harmony message with no recipient based on its channel."""
+    if message.channel == "analysis":
+        return _parse_reasoning(message)
+
+    if message.channel in ("commentary", "final"):
+        # Per Harmony format, preambles (commentary with no recipient) and
+        # final channel content are both intended to be shown to end-users.
+        # See: https://cookbook.openai.com/articles/openai-harmony
+        return [_parse_final_message(message)]
+
+    raise ValueError(f"Unknown channel: {message.channel}")
+
+
 def parse_output_message(message: Message) -> list[ResponseOutputItem]:
     """
     Parse a Harmony message into a list of output response items.
@@ -717,19 +725,8 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
             output_items.extend(_parse_mcp_call(message, recipient))
 
     # No recipient - handle based on channel for non-tool messages
-    elif message.channel == "analysis":
-        output_items.extend(_parse_reasoning(message))
-
-    elif message.channel == "commentary":
-        # Per Harmony format, commentary channel can contain preambles to calling
-        # multiple functions - explanatory text with no recipient
-        output_items.extend(_parse_reasoning(message))
-
-    elif message.channel == "final":
-        output_items.append(_parse_final_message(message))
-
     else:
-        raise ValueError(f"Unknown channel: {message.channel}")
+        output_items.extend(_parse_message_no_recipient(message))
 
     return output_items
 
@@ -786,7 +783,26 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
                 )
             ]
 
-    if parser.current_channel in ("commentary", "analysis"):
+    if parser.current_channel == "commentary":
+        # Per Harmony format, preambles (commentary with no recipient) are
+        # intended to be shown to end-users, unlike analysis channel content.
+        output_text = ResponseOutputText(
+            text=parser.current_content,
+            annotations=[],
+            type="output_text",
+            logprobs=None,
+        )
+        return [
+            ResponseOutputMessage(
+                id=f"msg_{random_uuid()}",
+                content=[output_text],
+                role="assistant",
+                status="incomplete",
+                type="message",
+            )
+        ]
+
+    if parser.current_channel == "analysis":
         return [
             ResponseReasoningItem(
                 id=f"rs_{random_uuid()}",
@@ -855,17 +871,30 @@ def parse_chat_output(
     is_tool_call = False  # TODO: update this when tool call is supported
 
     # Get completed messages from the parser
+    # - analysis channel: hidden reasoning
+    # - commentary channel without recipient (preambles): visible to user
+    # - final channel: visible to user
+    # - commentary with recipient (tool calls): handled separately by tool parser
     reasoning_texts = [
         msg.content[0].text for msg in output_msgs if msg.channel == "analysis"
     ]
     final_texts = [
-        msg.content[0].text for msg in output_msgs if msg.channel != "analysis"
+        msg.content[0].text
+        for msg in output_msgs
+        if msg.channel == "final" or (msg.channel == "commentary" and not msg.recipient)
     ]
 
     # Extract partial messages from the parser
     if parser.current_channel == "analysis" and parser.current_content:
         reasoning_texts.append(parser.current_content)
-    elif parser.current_channel != "analysis" and parser.current_content:
+    elif parser.current_channel == "final" and parser.current_content:
+        final_texts.append(parser.current_content)
+    elif (
+        parser.current_channel == "commentary"
+        and not parser.current_recipient
+        and parser.current_content
+    ):
+        # Preambles (commentary without recipient) are visible to user
         final_texts.append(parser.current_content)
 
     # Flatten multiple messages into a single string
diff --git a/vllm/entrypoints/openai/responses/context.py b/vllm/entrypoints/openai/responses/context.py
index b57adeeb8..bab59e0aa 100644
--- a/vllm/entrypoints/openai/responses/context.py
+++ b/vllm/entrypoints/openai/responses/context.py
@@ -540,8 +540,12 @@ class HarmonyContext(ConversationContext):
         self.first_tok_of_message = True  # For streaming support
 
     def _update_num_reasoning_tokens(self):
-        # Count all analysis and commentary channels as reasoning tokens
-        if self.parser.current_channel in {"analysis", "commentary"}:
+        channel = self.parser.current_channel
+        if channel == "analysis":
+            self.num_reasoning_tokens += 1
+        elif channel == "commentary" and self.parser.current_recipient is not None:
+            # Tool interactions (python/browser/container) are hidden.
+            # Preambles (recipient=None) are visible user text.
             self.num_reasoning_tokens += 1
 
     def append_output(self, output: RequestOutput) -> None:
diff --git a/vllm/entrypoints/openai/responses/streaming_events.py b/vllm/entrypoints/openai/responses/streaming_events.py
index 49d2b99da..cc242e7ba 100644
--- a/vllm/entrypoints/openai/responses/streaming_events.py
+++ b/vllm/entrypoints/openai/responses/streaming_events.py
@@ -563,7 +563,9 @@ def emit_content_delta_events(
     channel = ctx.parser.current_channel
     recipient = ctx.parser.current_recipient
 
-    if channel == "final" and recipient is None:
+    if channel in ("final", "commentary") and recipient is None:
+        # Preambles (commentary with no recipient) and final messages
+        # are both user-visible text.
         return emit_text_delta_events(delta, state)
     elif channel == "analysis" and recipient is None:
         return emit_reasoning_delta_events(delta, state)
@@ -607,7 +609,9 @@ def emit_previous_item_done_events(
             return emit_mcp_completion_events(previous_item.recipient, text, state)
     elif previous_item.channel == "analysis":
         return emit_reasoning_done_events(text, state)
-    elif previous_item.channel == "final":
+    elif previous_item.channel in ("commentary", "final"):
+        # Preambles (commentary with no recipient) and final messages
+        # are both user-visible text.
         return emit_text_output_done_events(text, state)
     return []