[Misc][Harmony] Move Responses API only harmony utils to responses/harmony.py (#35339)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-02-26 01:35:16 -05:00
committed by GitHub
parent 4a9c07a0a2
commit 186ea22efe
6 changed files with 1040 additions and 990 deletions

View File

@@ -2,13 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseReasoningItem,
)
from openai.types.responses.response_output_item import McpCall
from openai_harmony import Author, Message, Role, TextContent
from openai_harmony import Message, Role
from tests.entrypoints.openai.utils import verify_harmony_messages
from vllm.entrypoints.openai.parser.harmony_utils import (
@@ -18,20 +12,21 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
has_custom_tools,
parse_chat_input_to_harmony_message,
parse_chat_output,
parse_input_to_harmony_message,
parse_output_message,
)
from vllm.entrypoints.openai.responses.harmony import (
response_previous_input_to_harmony,
)
class TestCommonParseInputToHarmonyMessage:
"""
Tests for scenarios that are common to both Chat Completion
parse_chat_input_to_harmony_message and Responsees API
parse_input_to_harmony_message functions.
parse_chat_input_to_harmony_message and Responses API
response_previous_input_to_harmony functions.
"""
@pytest.fixture(
params=[parse_chat_input_to_harmony_message, parse_input_to_harmony_message]
params=[parse_chat_input_to_harmony_message, response_previous_input_to_harmony]
)
def parse_function(self, request):
return request.param
@@ -216,81 +211,6 @@ class TestCommonParseInputToHarmonyMessage:
assert messages[0].content[1].text == "actual text"
class TestParseInputToHarmonyMessage:
"""
Tests for scenarios that are specific to the Responses API
parse_input_to_harmony_message function.
"""
def test_message_with_empty_content(self):
"""Test parsing message with empty string content."""
chat_msg = {
"role": "user",
"content": "",
}
messages = parse_input_to_harmony_message(chat_msg)
assert len(messages) == 1
assert messages[0].content[0].text == ""
def test_tool_message_with_string_content(self):
"""Test parsing tool message with string content."""
chat_msg = {
"role": "tool",
"name": "get_weather",
"content": "The weather in San Francisco is sunny, 72°F",
}
messages = parse_input_to_harmony_message(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.get_weather"
assert (
messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F"
)
assert messages[0].channel == "commentary"
def test_tool_message_with_array_content(self):
"""Test parsing tool message with array content."""
chat_msg = {
"role": "tool",
"name": "search_results",
"content": [
{"type": "text", "text": "Result 1: "},
{"type": "text", "text": "Result 2: "},
{
"type": "image",
"url": "http://example.com/img.png",
}, # Should be ignored
{"type": "text", "text": "Result 3"},
],
}
messages = parse_input_to_harmony_message(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.search_results"
assert messages[0].content[0].text == "Result 1: Result 2: Result 3"
def test_tool_message_with_empty_content(self):
"""Test parsing tool message with None content."""
chat_msg = {
"role": "tool",
"name": "empty_tool",
"content": None,
}
messages = parse_input_to_harmony_message(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.empty_tool"
assert messages[0].content[0].text == ""
class TestParseChatInputToHarmonyMessage:
"""
Tests for scenarios that are specific to the Chat Completion API
@@ -888,200 +808,6 @@ class TestParseChatOutput:
assert final_content == "Let me look that up.\nThe answer is 42."
class TestParseOutputMessage:
"""Tests for parse_output_message function."""
def test_commentary_with_no_recipient_creates_message(self):
"""Test that commentary with recipient=None (preambles) creates message items.
Per Harmony format, preambles are intended to be shown to end-users,
unlike analysis channel content which is hidden reasoning.
See: https://cookbook.openai.com/articles/openai-harmony
"""
message = Message.from_role_and_content(
Role.ASSISTANT, "I will now search for the weather information."
)
message = message.with_channel("commentary")
# recipient is None by default, representing a preamble
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert output_items[0].type == "message"
assert output_items[0].role == "assistant"
assert output_items[0].status == "completed"
assert len(output_items[0].content) == 1
assert output_items[0].content[0].type == "output_text"
assert (
output_items[0].content[0].text
== "I will now search for the weather information."
)
def test_commentary_with_function_recipient_creates_function_call(self):
"""Test commentary with recipient='functions.X' creates function calls."""
message = Message.from_role_and_content(
Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}'
)
message = message.with_channel("commentary")
message = message.with_recipient("functions.get_weather")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].type == "function_call"
assert output_items[0].name == "get_weather"
assert (
output_items[0].arguments
== '{"location": "San Francisco", "units": "celsius"}'
)
assert output_items[0].call_id.startswith("call_")
assert output_items[0].id.startswith("fc_")
def test_commentary_with_python_recipient_creates_reasoning(self):
"""Test that commentary with recipient='python' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))"
)
message = message.with_channel("commentary")
message = message.with_recipient("python")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert (
output_items[0].content[0].text
== "import numpy as np\nprint(np.array([1, 2, 3]))"
)
def test_commentary_with_browser_recipient_creates_reasoning(self):
"""Test that commentary with recipient='browser' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Navigating to the specified URL"
)
message = message.with_channel("commentary")
message = message.with_recipient("browser")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert output_items[0].content[0].text == "Navigating to the specified URL"
def test_commentary_with_container_recipient_creates_reasoning(self):
"""Test that commentary with recipient='container' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Running command in container"
)
message = message.with_channel("commentary")
message = message.with_recipient("container")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert output_items[0].content[0].text == "Running command in container"
def test_commentary_with_empty_content_and_no_recipient(self):
"""Test edge case: empty commentary with recipient=None."""
message = Message.from_role_and_content(Role.ASSISTANT, "")
message = message.with_channel("commentary")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert output_items[0].content[0].text == ""
def test_commentary_with_multiple_contents_and_no_recipient(self):
"""Test multiple content items in commentary with no recipient."""
contents = [
TextContent(text="Step 1: Analyze the request"),
TextContent(text="Step 2: Prepare to call functions"),
]
message = Message.from_role_and_contents(Role.ASSISTANT, contents)
message = message.with_channel("commentary")
output_items = parse_output_message(message)
# _parse_final_message returns single ResponseOutputMessage with
# multiple contents
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert len(output_items[0].content) == 2
assert output_items[0].content[0].text == "Step 1: Analyze the request"
assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
def test_commentary_with_multiple_function_calls(self):
"""Test multiple function calls in commentary channel."""
contents = [
TextContent(text='{"location": "San Francisco"}'),
TextContent(text='{"location": "New York"}'),
]
message = Message.from_role_and_contents(Role.ASSISTANT, contents)
message = message.with_channel("commentary")
message = message.with_recipient("functions.get_weather")
output_items = parse_output_message(message)
assert len(output_items) == 2
assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items)
assert output_items[0].name == "get_weather"
assert output_items[1].name == "get_weather"
assert output_items[0].arguments == '{"location": "San Francisco"}'
assert output_items[1].arguments == '{"location": "New York"}'
def test_commentary_with_unknown_recipient_creates_mcp_call(self):
"""Test that commentary with unknown recipient creates MCP call."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("custom_tool")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "custom_tool"
assert output_items[0].server_label == "custom_tool"
def test_analysis_channel_creates_reasoning(self):
"""Test that analysis channel creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Analyzing the problem step by step..."
)
message = message.with_channel("analysis")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert (
output_items[0].content[0].text == "Analyzing the problem step by step..."
)
def test_non_assistant_message_returns_empty(self):
"""Test that non-assistant messages return empty list.
Per the implementation, tool messages to assistant (e.g., search results)
are not included in final output to align with OpenAI behavior.
"""
message = Message.from_author_and_content(
Author.new(Role.TOOL, "functions.get_weather"),
"The weather is sunny, 72°F",
)
output_items = parse_output_message(message)
assert len(output_items) == 0
def test_has_custom_tools() -> None:
assert not has_custom_tools(set())
assert not has_custom_tools({"web_search_preview", "code_interpreter", "container"})
@@ -1091,185 +817,6 @@ def test_has_custom_tools() -> None:
)
def test_parse_mcp_call_basic() -> None:
"""Test that MCP calls are parsed with correct type and server_label."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
message = message.with_recipient("filesystem")
message = message.with_channel("commentary")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "filesystem"
assert output_items[0].server_label == "filesystem"
assert output_items[0].arguments == '{"path": "/tmp"}'
assert output_items[0].status == "completed"
def test_parse_mcp_call_dotted_recipient() -> None:
"""Test that dotted recipients extract the tool name correctly."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
message = message.with_recipient("repo_browser.list")
message = message.with_channel("commentary")
output_items = parse_output_message(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].name == "list"
assert output_items[0].server_label == "repo_browser"
def test_mcp_vs_function_call() -> None:
"""Test that function calls are not parsed as MCP calls."""
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
func_message = func_message.with_recipient("functions.my_tool")
func_message = func_message.with_channel("commentary")
func_items = parse_output_message(func_message)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
def test_mcp_vs_builtin_tools() -> None:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
python_message = python_message.with_recipient("python")
python_message = python_message.with_channel("commentary")
python_items = parse_output_message(python_message)
assert len(python_items) == 1
assert not isinstance(python_items[0], McpCall)
assert python_items[0].type == "reasoning"
def test_parse_remaining_state_commentary_channel() -> None:
"""Test parse_remaining_state with commentary channel and various recipients."""
from unittest.mock import Mock
from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "commentary"
parser_func.current_recipient = "functions.my_tool"
func_items = parse_remaining_state(parser_func)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"path": "/tmp"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "commentary"
parser_mcp.current_recipient = "filesystem"
mcp_items = parse_remaining_state(parser_mcp)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "filesystem"
assert mcp_items[0].server_label == "filesystem"
assert mcp_items[0].status == "in_progress"
# Test 3: Built-in tool (python)
# should NOT return MCP call, returns reasoning (internal tool interaction)
parser_builtin = Mock()
parser_builtin.current_content = "print('hello')"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "commentary"
parser_builtin.current_recipient = "python"
builtin_items = parse_remaining_state(parser_builtin)
# Built-in tools explicitly return reasoning
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"
# Test 4: No recipient (preamble) → should return message, not reasoning
parser_preamble = Mock()
parser_preamble.current_content = "I'll search for that information now."
parser_preamble.current_role = Role.ASSISTANT
parser_preamble.current_channel = "commentary"
parser_preamble.current_recipient = None
preamble_items = parse_remaining_state(parser_preamble)
assert len(preamble_items) == 1
assert isinstance(preamble_items[0], ResponseOutputMessage)
assert preamble_items[0].type == "message"
assert preamble_items[0].content[0].text == "I'll search for that information now."
assert preamble_items[0].status == "incomplete" # streaming
def test_parse_remaining_state_analysis_channel() -> None:
"""Test parse_remaining_state with analysis channel and various recipients."""
from unittest.mock import Mock
from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "analysis"
parser_func.current_recipient = "functions.my_tool"
func_items = parse_remaining_state(parser_func)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"query": "test"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "analysis"
parser_mcp.current_recipient = "database"
mcp_items = parse_remaining_state(parser_mcp)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "database"
assert mcp_items[0].server_label == "database"
assert mcp_items[0].status == "in_progress"
# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin = Mock()
parser_builtin.current_content = "docker run"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "analysis"
parser_builtin.current_recipient = "container"
builtin_items = parse_remaining_state(parser_builtin)
# Should fall through to reasoning logic
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"
class TestGetSystemMessage:
"""Tests for get_system_message channel configuration."""

View File

@@ -0,0 +1,463 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for vllm.entrypoints.openai.responses.harmony."""
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseReasoningItem,
)
from openai.types.responses.response_output_item import McpCall
from openai_harmony import Author, Message, Role, TextContent
from vllm.entrypoints.openai.responses.harmony import (
harmony_to_response_output,
parser_state_to_response_output,
response_previous_input_to_harmony,
)
class TestResponsePreviousInputToHarmony:
"""
Tests for scenarios that are specific to the Responses API
response_previous_input_to_harmony function.
"""
def test_message_with_empty_content(self):
"""Test parsing message with empty string content."""
chat_msg = {
"role": "user",
"content": "",
}
messages = response_previous_input_to_harmony(chat_msg)
assert len(messages) == 1
assert messages[0].content[0].text == ""
def test_tool_message_with_string_content(self):
"""Test parsing tool message with string content."""
chat_msg = {
"role": "tool",
"name": "get_weather",
"content": "The weather in San Francisco is sunny, 72°F",
}
messages = response_previous_input_to_harmony(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.get_weather"
assert (
messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F"
)
assert messages[0].channel == "commentary"
def test_tool_message_with_array_content(self):
"""Test parsing tool message with array content."""
chat_msg = {
"role": "tool",
"name": "search_results",
"content": [
{"type": "text", "text": "Result 1: "},
{"type": "text", "text": "Result 2: "},
{
"type": "image",
"url": "http://example.com/img.png",
}, # Should be ignored
{"type": "text", "text": "Result 3"},
],
}
messages = response_previous_input_to_harmony(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.search_results"
assert messages[0].content[0].text == "Result 1: Result 2: Result 3"
def test_tool_message_with_empty_content(self):
"""Test parsing tool message with None content."""
chat_msg = {
"role": "tool",
"name": "empty_tool",
"content": None,
}
messages = response_previous_input_to_harmony(chat_msg)
assert len(messages) == 1
assert messages[0].author.role == Role.TOOL
assert messages[0].author.name == "functions.empty_tool"
assert messages[0].content[0].text == ""
class TestHarmonyToResponseOutput:
"""Tests for harmony_to_response_output function."""
def test_commentary_with_no_recipient_creates_message(self):
"""Test that commentary with recipient=None (preambles) creates message items.
Per Harmony format, preambles are intended to be shown to end-users,
unlike analysis channel content which is hidden reasoning.
See: https://cookbook.openai.com/articles/openai-harmony
"""
message = Message.from_role_and_content(
Role.ASSISTANT, "I will now search for the weather information."
)
message = message.with_channel("commentary")
# recipient is None by default, representing a preamble
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert output_items[0].type == "message"
assert output_items[0].role == "assistant"
assert output_items[0].status == "completed"
assert len(output_items[0].content) == 1
assert output_items[0].content[0].type == "output_text"
assert (
output_items[0].content[0].text
== "I will now search for the weather information."
)
def test_commentary_with_function_recipient_creates_function_call(self):
"""Test commentary with recipient='functions.X' creates function calls."""
message = Message.from_role_and_content(
Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}'
)
message = message.with_channel("commentary")
message = message.with_recipient("functions.get_weather")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].type == "function_call"
assert output_items[0].name == "get_weather"
assert (
output_items[0].arguments
== '{"location": "San Francisco", "units": "celsius"}'
)
assert output_items[0].call_id.startswith("call_")
assert output_items[0].id.startswith("fc_")
def test_commentary_with_python_recipient_creates_reasoning(self):
"""Test that commentary with recipient='python' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))"
)
message = message.with_channel("commentary")
message = message.with_recipient("python")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert (
output_items[0].content[0].text
== "import numpy as np\nprint(np.array([1, 2, 3]))"
)
def test_commentary_with_browser_recipient_creates_reasoning(self):
"""Test that commentary with recipient='browser' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Navigating to the specified URL"
)
message = message.with_channel("commentary")
message = message.with_recipient("browser")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert output_items[0].content[0].text == "Navigating to the specified URL"
def test_commentary_with_container_recipient_creates_reasoning(self):
"""Test that commentary with recipient='container' creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Running command in container"
)
message = message.with_channel("commentary")
message = message.with_recipient("container")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert output_items[0].content[0].text == "Running command in container"
def test_commentary_with_empty_content_and_no_recipient(self):
"""Test edge case: empty commentary with recipient=None."""
message = Message.from_role_and_content(Role.ASSISTANT, "")
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert output_items[0].content[0].text == ""
def test_commentary_with_multiple_contents_and_no_recipient(self):
"""Test multiple content items in commentary with no recipient."""
contents = [
TextContent(text="Step 1: Analyze the request"),
TextContent(text="Step 2: Prepare to call functions"),
]
message = Message.from_role_and_contents(Role.ASSISTANT, contents)
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
# _parse_final_message returns single ResponseOutputMessage with
# multiple contents
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseOutputMessage)
assert len(output_items[0].content) == 2
assert output_items[0].content[0].text == "Step 1: Analyze the request"
assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
def test_commentary_with_multiple_function_calls(self):
"""Test multiple function calls in commentary channel."""
contents = [
TextContent(text='{"location": "San Francisco"}'),
TextContent(text='{"location": "New York"}'),
]
message = Message.from_role_and_contents(Role.ASSISTANT, contents)
message = message.with_channel("commentary")
message = message.with_recipient("functions.get_weather")
output_items = harmony_to_response_output(message)
assert len(output_items) == 2
assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items)
assert output_items[0].name == "get_weather"
assert output_items[1].name == "get_weather"
assert output_items[0].arguments == '{"location": "San Francisco"}'
assert output_items[1].arguments == '{"location": "New York"}'
def test_commentary_with_unknown_recipient_creates_mcp_call(self):
"""Test that commentary with unknown recipient creates MCP call."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("custom_tool")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "custom_tool"
assert output_items[0].server_label == "custom_tool"
def test_analysis_channel_creates_reasoning(self):
"""Test that analysis channel creates reasoning items."""
message = Message.from_role_and_content(
Role.ASSISTANT, "Analyzing the problem step by step..."
)
message = message.with_channel("analysis")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseReasoningItem)
assert output_items[0].type == "reasoning"
assert (
output_items[0].content[0].text == "Analyzing the problem step by step..."
)
def test_non_assistant_message_returns_empty(self):
"""Test that non-assistant messages return empty list.
Per the implementation, tool messages to assistant (e.g., search results)
are not included in final output to align with OpenAI behavior.
"""
message = Message.from_author_and_content(
Author.new(Role.TOOL, "functions.get_weather"),
"The weather is sunny, 72°F",
)
output_items = harmony_to_response_output(message)
assert len(output_items) == 0
def test_parse_mcp_call_basic() -> None:
"""Test that MCP calls are parsed with correct type and server_label."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
message = message.with_recipient("filesystem")
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "filesystem"
assert output_items[0].server_label == "filesystem"
assert output_items[0].arguments == '{"path": "/tmp"}'
assert output_items[0].status == "completed"
def test_parse_mcp_call_dotted_recipient() -> None:
"""Test that dotted recipients extract the tool name correctly."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
message = message.with_recipient("repo_browser.list")
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].name == "list"
assert output_items[0].server_label == "repo_browser"
def test_mcp_vs_function_call() -> None:
"""Test that function calls are not parsed as MCP calls."""
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
func_message = func_message.with_recipient("functions.my_tool")
func_message = func_message.with_channel("commentary")
func_items = harmony_to_response_output(func_message)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
def test_mcp_vs_builtin_tools() -> None:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
python_message = python_message.with_recipient("python")
python_message = python_message.with_channel("commentary")
python_items = harmony_to_response_output(python_message)
assert len(python_items) == 1
assert not isinstance(python_items[0], McpCall)
assert python_items[0].type == "reasoning"
def test_parser_state_to_response_output_commentary_channel() -> None:
"""Test parser_state_to_response_output with commentary
channel and various recipients."""
from unittest.mock import Mock
# Test 1: functions.* recipient -> should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "commentary"
parser_func.current_recipient = "functions.my_tool"
func_items = parser_state_to_response_output(parser_func)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"
# Test 2: MCP tool (not builtin) -> should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"path": "/tmp"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "commentary"
parser_mcp.current_recipient = "filesystem"
mcp_items = parser_state_to_response_output(parser_mcp)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "filesystem"
assert mcp_items[0].server_label == "filesystem"
assert mcp_items[0].status == "in_progress"
# Test 3: Built-in tool (python)
# should NOT return MCP call, returns reasoning (internal tool interaction)
parser_builtin = Mock()
parser_builtin.current_content = "print('hello')"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "commentary"
parser_builtin.current_recipient = "python"
builtin_items = parser_state_to_response_output(parser_builtin)
# Built-in tools explicitly return reasoning
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"
# Test 4: No recipient (preamble) → should return message, not reasoning
parser_preamble = Mock()
parser_preamble.current_content = "I'll search for that information now."
parser_preamble.current_role = Role.ASSISTANT
parser_preamble.current_channel = "commentary"
parser_preamble.current_recipient = None
preamble_items = parser_state_to_response_output(parser_preamble)
assert len(preamble_items) == 1
assert isinstance(preamble_items[0], ResponseOutputMessage)
assert preamble_items[0].type == "message"
assert preamble_items[0].content[0].text == "I'll search for that information now."
assert preamble_items[0].status == "incomplete" # streaming
def test_parser_state_to_response_output_analysis_channel() -> None:
"""Test parser_state_to_response_output with analysis
channel and various recipients."""
from unittest.mock import Mock
# Test 1: functions.* recipient -> should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "analysis"
parser_func.current_recipient = "functions.my_tool"
func_items = parser_state_to_response_output(parser_func)
assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"
# Test 2: MCP tool (not builtin) -> should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"query": "test"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "analysis"
parser_mcp.current_recipient = "database"
mcp_items = parser_state_to_response_output(parser_mcp)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "database"
assert mcp_items[0].server_label == "database"
assert mcp_items[0].status == "in_progress"
# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin = Mock()
parser_builtin.current_content = "docker run"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "analysis"
parser_builtin.current_recipient = "container"
builtin_items = parser_state_to_response_output(parser_builtin)
# Should fall through to reasoning logic
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"

View File

@@ -97,16 +97,16 @@ class TestMCPToolServerUnit:
assert server.get_tool_description("test_server", allowed_tools=[]) is None
def test_builtin_tools_consistency(self):
"""MCP_BUILTIN_TOOLS must match _BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
"""MCP_BUILTIN_TOOLS must match BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
from vllm.entrypoints.openai.parser.harmony_utils import (
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
MCP_BUILTIN_TOOLS,
)
assert set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, (
assert set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, (
f"MCP_BUILTIN_TOOLS {MCP_BUILTIN_TOOLS} does not match "
f"_BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
f"{set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}"
f"BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
f"{set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}"
)

View File

@@ -2,27 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import datetime
import json
from collections.abc import Iterable, Sequence
from typing import Literal
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputItem,
ResponseOutputMessage,
ResponseOutputText,
ResponseReasoningItem,
)
from openai.types.responses.response_function_web_search import (
ActionFind,
ActionOpenPage,
ActionSearch,
ResponseFunctionWebSearch,
)
from openai.types.responses.response_output_item import McpCall
from openai.types.responses.response_reasoning_item import (
Content as ResponseReasoningTextContent,
)
from openai.types.responses.tool import Tool
from openai_harmony import (
Author,
@@ -38,17 +20,10 @@ from openai_harmony import (
ToolDescription,
load_harmony_encoding,
)
from openai_harmony import Message as OpenAIHarmonyMessage
from openai_harmony import Role as OpenAIHarmonyRole
from vllm import envs
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionToolsParam
from vllm.entrypoints.openai.responses.protocol import (
ResponseInputOutputItem,
ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.utils import random_uuid
logger = init_logger(__name__)
@@ -64,14 +39,14 @@ _harmony_encoding = None
# they are available and requested by the user.
# Tool args are provided by MCP tool descriptions. Output
# of the tools are stringified.
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = {
BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = {
"python": "code_interpreter",
"browser": "web_search_preview",
"container": "container",
}
# Derive MCP_BUILTIN_TOOLS from the canonical mapping
MCP_BUILTIN_TOOLS: set[str] = set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())
MCP_BUILTIN_TOOLS: set[str] = set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())
def has_custom_tools(tool_types: set[str]) -> bool:
@@ -179,55 +154,6 @@ def get_user_message(content: str) -> Message:
return Message.from_role_and_content(Role.USER, content)
def parse_response_input(
response_msg: ResponseInputOutputItem,
prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
) -> Message:
if not isinstance(response_msg, dict):
response_msg = response_msg.model_dump()
if "type" not in response_msg or response_msg["type"] == "message":
role = response_msg["role"]
content = response_msg["content"]
# Add prefix for developer messages.
# <|start|>developer<|message|># Instructions {instructions}<|end|>
text_prefix = "Instructions:\n" if role == "developer" else ""
if isinstance(content, str):
msg = Message.from_role_and_content(role, text_prefix + content)
else:
contents = [TextContent(text=text_prefix + c["text"]) for c in content]
msg = Message.from_role_and_contents(role, contents)
if role == "assistant":
msg = msg.with_channel("final")
elif response_msg["type"] == "function_call_output":
call_id = response_msg["call_id"]
call_response: ResponseFunctionToolCall | None = None
for prev_response in reversed(prev_responses):
if (
isinstance(prev_response, ResponseFunctionToolCall)
and prev_response.call_id == call_id
):
call_response = prev_response
break
if call_response is None:
raise ValueError(f"No call message found for {call_id}")
msg = Message.from_author_and_content(
Author.new(Role.TOOL, f"functions.{call_response.name}"),
response_msg["output"],
)
elif response_msg["type"] == "reasoning":
content = response_msg["content"]
assert len(content) == 1
msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
elif response_msg["type"] == "function_call":
msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
msg = msg.with_channel("commentary")
msg = msg.with_recipient(f"functions.{response_msg['name']}")
msg = msg.with_content_type("json")
else:
raise ValueError(f"Unknown input type: {response_msg['type']}")
return msg
def parse_chat_inputs_to_harmony_messages(chat_msgs: list) -> list[Message]:
"""
Parse a list of messages from request.messages in the Chat Completion API to
@@ -390,139 +316,6 @@ def parse_chat_input_to_harmony_message(
return msgs
def parse_input_to_harmony_message(chat_msg) -> list[Message]:
"""Parse a message from request.previous_input_messages
into Harmony messages.
Supports both OpenAI chat format ({"role": "..."}) and
Harmony format ({"author": {"role": "..."}}).
"""
if not isinstance(chat_msg, dict):
chat_msg = chat_msg.model_dump(exclude_none=True)
if "author" in chat_msg and isinstance(chat_msg.get("author"), dict):
return [_parse_harmony_format_message(chat_msg)]
return _parse_chat_format_message(chat_msg)
def _parse_harmony_format_message(chat_msg: dict) -> Message:
"""Reconstruct a Message from Harmony-format dict,
preserving channel, recipient, and content_type."""
author_dict = chat_msg["author"]
role = author_dict.get("role")
name = author_dict.get("name")
raw_content = chat_msg.get("content", "")
if isinstance(raw_content, list):
# TODO: Support refusal and non-text content types.
contents = [TextContent(text=c.get("text", "")) for c in raw_content]
elif isinstance(raw_content, str):
contents = [TextContent(text=raw_content)]
else:
contents = [TextContent(text="")]
if name:
msg = Message.from_author_and_contents(Author.new(Role(role), name), contents)
else:
msg = Message.from_role_and_contents(Role(role), contents)
channel = chat_msg.get("channel")
if channel:
msg = msg.with_channel(channel)
recipient = chat_msg.get("recipient")
if recipient:
msg = msg.with_recipient(recipient)
content_type = chat_msg.get("content_type")
if content_type:
msg = msg.with_content_type(content_type)
return msg
def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
"""Parse an OpenAI chat-format dict into Harmony messages."""
role = chat_msg.get("role")
if role is None:
raise ValueError(f"Message has no 'role' key: {chat_msg}")
# Assistant message with tool calls
tool_calls = chat_msg.get("tool_calls")
if role == "assistant" and tool_calls:
msgs: list[Message] = []
for call in tool_calls:
func = call.get("function", {})
name = func.get("name", "")
arguments = func.get("arguments", "") or ""
msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
msg = msg.with_channel("commentary")
msg = msg.with_recipient(f"functions.{name}")
msg = msg.with_content_type("json")
msgs.append(msg)
return msgs
# Tool role message (tool output)
if role == "tool":
name = chat_msg.get("name", "")
if name and not name.startswith("functions."):
name = f"functions.{name}"
content = chat_msg.get("content", "") or ""
content = flatten_chat_text_content(content)
# NOTE: .with_recipient("assistant") is required on tool messages
# to match parse_chat_input_to_harmony_message behavior and ensure
# proper routing in the Harmony protocol.
msg = (
Message.from_author_and_content(Author.new(Role.TOOL, name), content)
.with_channel("commentary")
.with_recipient("assistant")
)
return [msg]
# Default: user/assistant/system messages
content = chat_msg.get("content", "")
if isinstance(content, str):
contents = [TextContent(text=content)]
else:
# TODO: Support refusal.
contents = [TextContent(text=c.get("text", "")) for c in content]
msg = Message.from_role_and_contents(role, contents)
return [msg]
def construct_harmony_previous_input_messages(
request: ResponsesRequest,
) -> list[OpenAIHarmonyMessage]:
messages: list[OpenAIHarmonyMessage] = []
if request.previous_input_messages:
for message in request.previous_input_messages:
# Handle both OpenAIHarmonyMessage objects and dictionary inputs
if isinstance(message, OpenAIHarmonyMessage):
message_role = message.author.role
# To match OpenAI, instructions, reasoning and tools are
# always taken from the most recent Responses API request
# not carried over from previous requests
if (
message_role == OpenAIHarmonyRole.SYSTEM
or message_role == OpenAIHarmonyRole.DEVELOPER
):
continue
messages.append(message)
else:
harmony_messages = parse_input_to_harmony_message(message)
for harmony_msg in harmony_messages:
message_role = harmony_msg.author.role
# To match OpenAI, instructions, reasoning and tools are
# always taken from the most recent Responses API request
# not carried over from previous requests
if (
message_role == OpenAIHarmonyRole.SYSTEM
or message_role == OpenAIHarmonyRole.DEVELOPER
):
continue
messages.append(harmony_msg)
return messages
def render_for_completion(messages: list[Message]) -> list[int]:
conversation = Conversation.from_messages(messages)
token_ids = get_encoding().render_conversation_for_completion(
@@ -531,313 +324,6 @@ def render_for_completion(messages: list[Message]) -> list[int]:
return token_ids
def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
"""Parse browser tool calls (search, open, find) into web search items."""
if len(message.content) != 1:
raise ValueError("Invalid number of contents in browser message")
content = message.content[0]
# Parse JSON args (with retry detection)
try:
browser_call = json.loads(content.text)
except json.JSONDecodeError:
logger.warning(
"Invalid JSON in browser tool call, using error placeholder: %s",
content.text,
)
json_retry_output_message = (
f"Invalid JSON args, caught and retried: {content.text}"
)
browser_call = {
"query": json_retry_output_message,
"url": json_retry_output_message,
"pattern": json_retry_output_message,
}
# Create appropriate action based on recipient
if recipient == "browser.search":
action = ActionSearch(
query=f"cursor:{browser_call.get('query', '')}", type="search"
)
elif recipient == "browser.open":
action = ActionOpenPage(
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
)
elif recipient == "browser.find":
action = ActionFind(
pattern=browser_call.get("pattern", ""),
url=f"cursor:{browser_call.get('url', '')}",
type="find",
)
else:
raise ValueError(f"Unknown browser action: {recipient}")
return ResponseFunctionWebSearch(
id=f"ws_{random_uuid()}",
action=action,
status="completed",
type="web_search_call",
)
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse function calls into function tool call items."""
function_name = recipient.split(".")[-1]
output_items = []
for content in message.content:
random_id = random_uuid()
response_item = ResponseFunctionToolCall(
arguments=content.text,
call_id=f"call_{random_id}",
type="function_call",
name=function_name,
id=f"fc_{random_id}",
)
output_items.append(response_item)
return output_items
def _parse_reasoning(message: Message) -> list[ResponseOutputItem]:
"""Parse reasoning/analysis content into reasoning items."""
output_items = []
for content in message.content:
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(text=content.text, type="reasoning_text")
],
status=None,
)
output_items.append(reasoning_item)
return output_items
def _parse_final_message(message: Message) -> ResponseOutputItem:
"""Parse final channel messages into output message items."""
contents = []
for content in message.content:
output_text = ResponseOutputText(
text=content.text,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
contents.append(output_text)
return ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=contents,
role=message.author.role,
status="completed",
type="message",
)
def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
"""
Parse MCP recipient into (server_label, tool_name).
For dotted recipients like "repo_browser.list":
- server_label: "repo_browser" (namespace/server)
- tool_name: "list" (specific tool)
For simple recipients like "filesystem":
- server_label: "filesystem"
- tool_name: "filesystem"
"""
if "." in recipient:
server_label = recipient.split(".")[0]
tool_name = recipient.split(".")[-1]
else:
server_label = recipient
tool_name = recipient
return server_label, tool_name
def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse MCP calls into MCP call items."""
# Handle built-in tools that need server_label mapping
if recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
server_label = _BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient]
tool_name = recipient
else:
server_label, tool_name = _parse_mcp_recipient(recipient)
output_items = []
for content in message.content:
response_item = McpCall(
arguments=content.text,
type="mcp_call",
name=tool_name,
server_label=server_label,
id=f"mcp_{random_uuid()}",
status="completed",
)
output_items.append(response_item)
return output_items
def _parse_message_no_recipient(
message: Message,
) -> list[ResponseOutputItem]:
"""Parse a Harmony message with no recipient based on its channel."""
if message.channel == "analysis":
return _parse_reasoning(message)
if message.channel in ("commentary", "final"):
# Per Harmony format, preambles (commentary with no recipient) and
# final channel content are both intended to be shown to end-users.
# See: https://cookbook.openai.com/articles/openai-harmony
return [_parse_final_message(message)]
raise ValueError(f"Unknown channel: {message.channel}")
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
"""
Parse a Harmony message into a list of output response items.
"""
if message.author.role != "assistant":
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return []
output_items: list[ResponseOutputItem] = []
recipient = message.recipient
if recipient is not None:
# Browser tool calls (browser.search, browser.open, browser.find)
if recipient.startswith("browser."):
output_items.append(_parse_browser_tool_call(message, recipient))
# Function calls (should only happen on commentary channel)
elif message.channel == "commentary" and recipient.startswith("functions."):
output_items.extend(_parse_function_call(message, recipient))
# Built-in MCP tools (python, browser, container)
elif recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
output_items.extend(_parse_reasoning(message))
# All other recipients are MCP calls
else:
output_items.extend(_parse_mcp_call(message, recipient))
# No recipient - handle based on channel for non-tool messages
else:
output_items.extend(_parse_message_no_recipient(message))
return output_items
def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
if not parser.current_content:
return []
if parser.current_role != Role.ASSISTANT:
return []
current_recipient = parser.current_recipient
if current_recipient is not None and current_recipient.startswith("browser."):
return []
if current_recipient and parser.current_channel in ("commentary", "analysis"):
if current_recipient.startswith("functions."):
rid = random_uuid()
return [
ResponseFunctionToolCall(
arguments=parser.current_content,
call_id=f"call_{rid}",
type="function_call",
name=current_recipient.split(".")[-1],
id=f"fc_{rid}",
status="in_progress",
)
]
# Built-in MCP tools (python, browser, container)
elif current_recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
return [
ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=parser.current_content, type="reasoning_text"
)
],
status=None,
)
]
# All other recipients are MCP calls
else:
rid = random_uuid()
server_label, tool_name = _parse_mcp_recipient(current_recipient)
return [
McpCall(
arguments=parser.current_content,
type="mcp_call",
name=tool_name,
server_label=server_label,
id=f"mcp_{rid}",
status="in_progress",
)
]
if parser.current_channel == "commentary":
# Per Harmony format, preambles (commentary with no recipient) are
# intended to be shown to end-users, unlike analysis channel content.
output_text = ResponseOutputText(
text=parser.current_content,
annotations=[],
type="output_text",
logprobs=None,
)
return [
ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=[output_text],
role="assistant",
status="incomplete",
type="message",
)
]
if parser.current_channel == "analysis":
return [
ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=parser.current_content, type="reasoning_text"
)
],
status=None,
)
]
if parser.current_channel == "final":
output_text = ResponseOutputText(
text=parser.current_content,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
text_item = ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=[output_text],
role="assistant",
# if the parser still has messages (ie if the generator got cut
# abruptly), this should be incomplete
status="incomplete",
type="message",
)
return [text_item]
return []
def get_stop_tokens_for_assistant_actions() -> list[int]:
return get_encoding().stop_tokens_for_assistant_actions()

View File

@@ -0,0 +1,552 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Harmony ↔ Responses API conversion utilities.
Handles two directions:
1. Response Input → Harmony Messages (input parsing)
2. Harmony Messages → Response Output Items (output parsing)
"""
import json
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputItem,
ResponseOutputMessage,
ResponseOutputText,
ResponseReasoningItem,
)
from openai.types.responses.response_function_web_search import (
ActionFind,
ActionOpenPage,
ActionSearch,
ResponseFunctionWebSearch,
)
from openai.types.responses.response_output_item import McpCall
from openai.types.responses.response_reasoning_item import (
Content as ResponseReasoningTextContent,
)
from openai_harmony import Author, Message, Role, StreamableParser, TextContent
from vllm.entrypoints.openai.parser.harmony_utils import (
BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
flatten_chat_text_content,
)
from vllm.entrypoints.openai.responses.protocol import (
ResponseInputOutputItem,
ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.utils import random_uuid
logger = init_logger(__name__)
# ---------------------------------------------------------------------------
# 1. Private helpers for input parsing
# ---------------------------------------------------------------------------
def _parse_harmony_format_message(chat_msg: dict) -> Message:
"""Reconstruct a Message from Harmony-format dict,
preserving channel, recipient, and content_type."""
author_dict = chat_msg["author"]
role = author_dict.get("role")
name = author_dict.get("name")
raw_content = chat_msg.get("content", "")
if isinstance(raw_content, list):
# TODO: Support refusal and non-text content types.
contents = [TextContent(text=c.get("text", "")) for c in raw_content]
elif isinstance(raw_content, str):
contents = [TextContent(text=raw_content)]
else:
contents = [TextContent(text="")]
if name:
msg = Message.from_author_and_contents(Author.new(Role(role), name), contents)
else:
msg = Message.from_role_and_contents(Role(role), contents)
channel = chat_msg.get("channel")
if channel:
msg = msg.with_channel(channel)
recipient = chat_msg.get("recipient")
if recipient:
msg = msg.with_recipient(recipient)
content_type = chat_msg.get("content_type")
if content_type:
msg = msg.with_content_type(content_type)
return msg
def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
"""Parse an OpenAI chat-format dict into Harmony messages."""
role = chat_msg.get("role")
if role is None:
raise ValueError(f"Message has no 'role' key: {chat_msg}")
# Assistant message with tool calls
tool_calls = chat_msg.get("tool_calls")
if role == "assistant" and tool_calls:
msgs: list[Message] = []
for call in tool_calls:
func = call.get("function", {})
name = func.get("name", "")
arguments = func.get("arguments", "") or ""
msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
msg = msg.with_channel("commentary")
msg = msg.with_recipient(f"functions.{name}")
msg = msg.with_content_type("json")
msgs.append(msg)
return msgs
# Tool role message (tool output)
if role == "tool":
name = chat_msg.get("name", "")
if name and not name.startswith("functions."):
name = f"functions.{name}"
content = chat_msg.get("content", "") or ""
content = flatten_chat_text_content(content)
# NOTE: .with_recipient("assistant") is required on tool messages
# to match parse_chat_input_to_harmony_message behavior and ensure
# proper routing in the Harmony protocol.
msg = (
Message.from_author_and_content(Author.new(Role.TOOL, name), content)
.with_channel("commentary")
.with_recipient("assistant")
)
return [msg]
# Default: user/assistant/system messages
content = chat_msg.get("content", "")
if isinstance(content, str):
contents = [TextContent(text=content)]
else:
# TODO: Support refusal.
contents = [TextContent(text=c.get("text", "")) for c in content]
msg = Message.from_role_and_contents(role, contents)
return [msg]
# ---------------------------------------------------------------------------
# 2. Public input parsing functions
# ---------------------------------------------------------------------------
def response_input_to_harmony(
response_msg: ResponseInputOutputItem,
prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
) -> Message:
"""Convert a single ResponseInputOutputItem into a Harmony Message."""
if not isinstance(response_msg, dict):
response_msg = response_msg.model_dump()
if "type" not in response_msg or response_msg["type"] == "message":
role = response_msg["role"]
content = response_msg["content"]
# Add prefix for developer messages.
# <|start|>developer<|message|># Instructions {instructions}<|end|>
text_prefix = "Instructions:\n" if role == "developer" else ""
if isinstance(content, str):
msg = Message.from_role_and_content(role, text_prefix + content)
else:
contents = [TextContent(text=text_prefix + c["text"]) for c in content]
msg = Message.from_role_and_contents(role, contents)
if role == "assistant":
msg = msg.with_channel("final")
elif response_msg["type"] == "function_call_output":
call_id = response_msg["call_id"]
call_response: ResponseFunctionToolCall | None = None
for prev_response in reversed(prev_responses):
if (
isinstance(prev_response, ResponseFunctionToolCall)
and prev_response.call_id == call_id
):
call_response = prev_response
break
if call_response is None:
raise ValueError(f"No call message found for {call_id}")
msg = Message.from_author_and_content(
Author.new(Role.TOOL, f"functions.{call_response.name}"),
response_msg["output"],
)
elif response_msg["type"] == "reasoning":
content = response_msg["content"]
assert len(content) == 1
msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
elif response_msg["type"] == "function_call":
msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
msg = msg.with_channel("commentary")
msg = msg.with_recipient(f"functions.{response_msg['name']}")
msg = msg.with_content_type("json")
else:
raise ValueError(f"Unknown input type: {response_msg['type']}")
return msg
def response_previous_input_to_harmony(chat_msg) -> list[Message]:
"""Parse a message from request.previous_input_messages
into Harmony messages.
Supports both OpenAI chat format ({"role": "..."}) and
Harmony format ({"author": {"role": "..."}}).
"""
if not isinstance(chat_msg, dict):
chat_msg = chat_msg.model_dump(exclude_none=True)
if "author" in chat_msg and isinstance(chat_msg.get("author"), dict):
return [_parse_harmony_format_message(chat_msg)]
return _parse_chat_format_message(chat_msg)
def construct_harmony_previous_input_messages(
request: ResponsesRequest,
) -> list[Message]:
"""Build a Harmony message list from request.previous_input_messages.
Filters out system/developer messages to match OpenAI behavior where
instructions are always taken from the most recent Responses API request.
"""
messages: list[Message] = []
if request.previous_input_messages:
for message in request.previous_input_messages:
# Handle both Message objects and dictionary inputs
if isinstance(message, Message):
message_role = message.author.role
if message_role == Role.SYSTEM or message_role == Role.DEVELOPER:
continue
messages.append(message)
else:
harmony_messages = response_previous_input_to_harmony(message)
for harmony_msg in harmony_messages:
message_role = harmony_msg.author.role
if message_role == Role.SYSTEM or message_role == Role.DEVELOPER:
continue
messages.append(harmony_msg)
return messages
# ---------------------------------------------------------------------------
# 3. Private helpers for output parsing
# ---------------------------------------------------------------------------
def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
"""Parse browser tool calls (search, open, find) into web search items."""
if len(message.content) != 1:
raise ValueError("Invalid number of contents in browser message")
content = message.content[0]
# Parse JSON args (with retry detection)
try:
browser_call = json.loads(content.text)
except json.JSONDecodeError:
logger.warning(
"Invalid JSON in browser tool call, using error placeholder: %s",
content.text,
)
json_retry_output_message = (
f"Invalid JSON args, caught and retried: {content.text}"
)
browser_call = {
"query": json_retry_output_message,
"url": json_retry_output_message,
"pattern": json_retry_output_message,
}
# Create appropriate action based on recipient
if recipient == "browser.search":
action = ActionSearch(
query=f"cursor:{browser_call.get('query', '')}", type="search"
)
elif recipient == "browser.open":
action = ActionOpenPage(
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
)
elif recipient == "browser.find":
action = ActionFind(
pattern=browser_call.get("pattern", ""),
url=f"cursor:{browser_call.get('url', '')}",
type="find",
)
else:
raise ValueError(f"Unknown browser action: {recipient}")
return ResponseFunctionWebSearch(
id=f"ws_{random_uuid()}",
action=action,
status="completed",
type="web_search_call",
)
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse function calls into function tool call items."""
function_name = recipient.split(".")[-1]
output_items = []
for content in message.content:
random_id = random_uuid()
response_item = ResponseFunctionToolCall(
arguments=content.text,
call_id=f"call_{random_id}",
type="function_call",
name=function_name,
id=f"fc_{random_id}",
)
output_items.append(response_item)
return output_items
def _parse_reasoning(message: Message) -> list[ResponseOutputItem]:
"""Parse reasoning/analysis content into reasoning items."""
output_items = []
for content in message.content:
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(text=content.text, type="reasoning_text")
],
status=None,
)
output_items.append(reasoning_item)
return output_items
def _parse_final_message(message: Message) -> ResponseOutputItem:
"""Parse final channel messages into output message items."""
contents = []
for content in message.content:
output_text = ResponseOutputText(
text=content.text,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
contents.append(output_text)
return ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=contents,
role=message.author.role,
status="completed",
type="message",
)
def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
"""Parse MCP recipient into (server_label, tool_name).
For dotted recipients like "repo_browser.list":
- server_label: "repo_browser" (namespace/server)
- tool_name: "list" (specific tool)
For simple recipients like "filesystem":
- server_label: "filesystem"
- tool_name: "filesystem"
"""
if "." in recipient:
server_label = recipient.split(".")[0]
tool_name = recipient.split(".")[-1]
else:
server_label = recipient
tool_name = recipient
return server_label, tool_name
def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse MCP calls into MCP call items."""
# Handle built-in tools that need server_label mapping
if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
server_label = BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient]
tool_name = recipient
else:
server_label, tool_name = _parse_mcp_recipient(recipient)
output_items = []
for content in message.content:
response_item = McpCall(
arguments=content.text,
type="mcp_call",
name=tool_name,
server_label=server_label,
id=f"mcp_{random_uuid()}",
status="completed",
)
output_items.append(response_item)
return output_items
def _parse_message_no_recipient(
message: Message,
) -> list[ResponseOutputItem]:
"""Parse a Harmony message with no recipient based on its channel."""
if message.channel == "analysis":
return _parse_reasoning(message)
if message.channel in ("commentary", "final"):
# Per Harmony format, preambles (commentary with no recipient) and
# final channel content are both intended to be shown to end-users.
# See: https://cookbook.openai.com/articles/openai-harmony
return [_parse_final_message(message)]
raise ValueError(f"Unknown channel: {message.channel}")
# ---------------------------------------------------------------------------
# 4. Public output parsing functions
# ---------------------------------------------------------------------------
def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
"""Parse a Harmony message into a list of output response items.
This is the main dispatcher that routes based on channel and recipient.
"""
if message.author.role != "assistant":
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return []
output_items: list[ResponseOutputItem] = []
recipient = message.recipient
if recipient is not None:
# Browser tool calls (browser.search, browser.open, browser.find)
if recipient.startswith("browser."):
output_items.append(_parse_browser_tool_call(message, recipient))
# Function calls (should only happen on commentary channel)
elif message.channel == "commentary" and recipient.startswith("functions."):
output_items.extend(_parse_function_call(message, recipient))
# Built-in MCP tools (python, browser, container)
elif recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
output_items.extend(_parse_reasoning(message))
# All other recipients are MCP calls
else:
output_items.extend(_parse_mcp_call(message, recipient))
# No recipient - handle based on channel for non-tool messages
else:
output_items.extend(_parse_message_no_recipient(message))
return output_items
def parser_state_to_response_output(
parser: StreamableParser,
) -> list[ResponseOutputItem]:
"""Extract in-progress response items from incomplete parser state.
Called when the parser has buffered content that hasn't formed a
complete message yet (e.g., generation was cut short).
"""
if not parser.current_content:
return []
if parser.current_role != Role.ASSISTANT:
return []
current_recipient = parser.current_recipient
if current_recipient is not None and current_recipient.startswith("browser."):
return []
if current_recipient and parser.current_channel in ("commentary", "analysis"):
if current_recipient.startswith("functions."):
rid = random_uuid()
return [
ResponseFunctionToolCall(
arguments=parser.current_content,
call_id=f"call_{rid}",
type="function_call",
name=current_recipient.split(".")[-1],
id=f"fc_{rid}",
status="in_progress",
)
]
# Built-in MCP tools (python, browser, container)
elif current_recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
return [
ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=parser.current_content, type="reasoning_text"
)
],
status=None,
)
]
# All other recipients are MCP calls
else:
rid = random_uuid()
server_label, tool_name = _parse_mcp_recipient(current_recipient)
return [
McpCall(
arguments=parser.current_content,
type="mcp_call",
name=tool_name,
server_label=server_label,
id=f"mcp_{rid}",
status="in_progress",
)
]
if parser.current_channel == "commentary":
# Per Harmony format, preambles (commentary with no recipient) are
# intended to be shown to end-users, unlike analysis channel content.
output_text = ResponseOutputText(
text=parser.current_content,
annotations=[],
type="output_text",
logprobs=None,
)
return [
ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=[output_text],
role="assistant",
status="incomplete",
type="message",
)
]
if parser.current_channel == "analysis":
return [
ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=parser.current_content, type="reasoning_text"
)
],
status=None,
)
]
if parser.current_channel == "final":
output_text = ResponseOutputText(
text=parser.current_content,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
text_item = ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=[output_text],
role="assistant",
# if the parser still has messages (ie if the generator got cut
# abruptly), this should be incomplete
status="incomplete",
type="message",
)
return [text_item]
return []

View File

@@ -58,15 +58,11 @@ from vllm.entrypoints.openai.engine.serving import (
)
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.parser.harmony_utils import (
construct_harmony_previous_input_messages,
get_developer_message,
get_stop_tokens_for_assistant_actions,
get_system_message,
get_user_message,
has_custom_tools,
parse_output_message,
parse_remaining_state,
parse_response_input,
render_for_completion,
)
from vllm.entrypoints.openai.responses.context import (
@@ -76,6 +72,12 @@ from vllm.entrypoints.openai.responses.context import (
SimpleContext,
StreamingHarmonyContext,
)
from vllm.entrypoints.openai.responses.harmony import (
construct_harmony_previous_input_messages,
harmony_to_response_output,
parser_state_to_response_output,
response_input_to_harmony,
)
from vllm.entrypoints.openai.responses.protocol import (
InputTokensDetails,
OutputTokensDetails,
@@ -954,9 +956,9 @@ class OpenAIServingResponses(OpenAIServing):
output_items: list[ResponseOutputItem] = []
num_init_messages = context.num_init_messages
for msg in context.messages[num_init_messages:]:
output_items.extend(parse_output_message(msg))
output_items.extend(harmony_to_response_output(msg))
# Handle the generation stopped in the middle (if any).
last_items = parse_remaining_state(context.parser)
last_items = parser_state_to_response_output(context.parser)
if last_items:
output_items.extend(last_items)
return output_items
@@ -1103,13 +1105,13 @@ class OpenAIServingResponses(OpenAIServing):
else:
prev_outputs = []
for response_msg in request.input:
new_msg = parse_response_input(response_msg, prev_outputs)
new_msg = response_input_to_harmony(response_msg, prev_outputs)
if new_msg.author.role != "system":
messages.append(new_msg)
# User passes in a tool call request and its output. We need
# to add the tool call request to prev_outputs so that the
# parse_response_input can find the tool call request when
# to add the tool call request to prev_outputs so that
# response_input_to_harmony can find the tool call request when
# parsing the tool call output.
if isinstance(response_msg, ResponseFunctionToolCall):
prev_outputs.append(response_msg)