diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py index b73a0b074..7842a1fcd 100644 --- a/tests/entrypoints/openai/parser/test_harmony_utils.py +++ b/tests/entrypoints/openai/parser/test_harmony_utils.py @@ -2,13 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -from openai.types.responses import ( - ResponseFunctionToolCall, - ResponseOutputMessage, - ResponseReasoningItem, -) -from openai.types.responses.response_output_item import McpCall -from openai_harmony import Author, Message, Role, TextContent +from openai_harmony import Message, Role from tests.entrypoints.openai.utils import verify_harmony_messages from vllm.entrypoints.openai.parser.harmony_utils import ( @@ -18,20 +12,21 @@ from vllm.entrypoints.openai.parser.harmony_utils import ( has_custom_tools, parse_chat_input_to_harmony_message, parse_chat_output, - parse_input_to_harmony_message, - parse_output_message, +) +from vllm.entrypoints.openai.responses.harmony import ( + response_previous_input_to_harmony, ) class TestCommonParseInputToHarmonyMessage: """ Tests for scenarios that are common to both Chat Completion - parse_chat_input_to_harmony_message and Responsees API - parse_input_to_harmony_message functions. + parse_chat_input_to_harmony_message and Responses API + response_previous_input_to_harmony functions. """ @pytest.fixture( - params=[parse_chat_input_to_harmony_message, parse_input_to_harmony_message] + params=[parse_chat_input_to_harmony_message, response_previous_input_to_harmony] ) def parse_function(self, request): return request.param @@ -216,81 +211,6 @@ class TestCommonParseInputToHarmonyMessage: assert messages[0].content[1].text == "actual text" -class TestParseInputToHarmonyMessage: - """ - Tests for scenarios that are specific to the Responses API - parse_input_to_harmony_message function. - """ - - def test_message_with_empty_content(self): - """Test parsing message with empty string content.""" - chat_msg = { - "role": "user", - "content": "", - } - - messages = parse_input_to_harmony_message(chat_msg) - - assert len(messages) == 1 - assert messages[0].content[0].text == "" - - def test_tool_message_with_string_content(self): - """Test parsing tool message with string content.""" - chat_msg = { - "role": "tool", - "name": "get_weather", - "content": "The weather in San Francisco is sunny, 72°F", - } - - messages = parse_input_to_harmony_message(chat_msg) - - assert len(messages) == 1 - assert messages[0].author.role == Role.TOOL - assert messages[0].author.name == "functions.get_weather" - assert ( - messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F" - ) - assert messages[0].channel == "commentary" - - def test_tool_message_with_array_content(self): - """Test parsing tool message with array content.""" - chat_msg = { - "role": "tool", - "name": "search_results", - "content": [ - {"type": "text", "text": "Result 1: "}, - {"type": "text", "text": "Result 2: "}, - { - "type": "image", - "url": "http://example.com/img.png", - }, # Should be ignored - {"type": "text", "text": "Result 3"}, - ], - } - - messages = parse_input_to_harmony_message(chat_msg) - - assert len(messages) == 1 - assert messages[0].author.role == Role.TOOL - assert messages[0].author.name == "functions.search_results" - assert messages[0].content[0].text == "Result 1: Result 2: Result 3" - - def test_tool_message_with_empty_content(self): - """Test parsing tool message with None content.""" - chat_msg = { - "role": "tool", - "name": "empty_tool", - "content": None, - } - - messages = parse_input_to_harmony_message(chat_msg) - - assert len(messages) == 1 - assert messages[0].author.role == Role.TOOL - assert messages[0].author.name == "functions.empty_tool" - assert messages[0].content[0].text == "" - - class TestParseChatInputToHarmonyMessage: """ Tests for scenarios that are specific to the Chat Completion API @@ -888,200 +808,6 @@ class TestParseChatOutput: assert final_content == "Let me look that up.\nThe answer is 42." -class TestParseOutputMessage: - """Tests for parse_output_message function.""" - - def test_commentary_with_no_recipient_creates_message(self): - """Test that commentary with recipient=None (preambles) creates message items. - - Per Harmony format, preambles are intended to be shown to end-users, - unlike analysis channel content which is hidden reasoning. - See: https://cookbook.openai.com/articles/openai-harmony - """ - message = Message.from_role_and_content( - Role.ASSISTANT, "I will now search for the weather information." - ) - message = message.with_channel("commentary") - # recipient is None by default, representing a preamble - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseOutputMessage) - assert output_items[0].type == "message" - assert output_items[0].role == "assistant" - assert output_items[0].status == "completed" - assert len(output_items[0].content) == 1 - assert output_items[0].content[0].type == "output_text" - assert ( - output_items[0].content[0].text - == "I will now search for the weather information." - ) - - def test_commentary_with_function_recipient_creates_function_call(self): - """Test commentary with recipient='functions.X' creates function calls.""" - message = Message.from_role_and_content( - Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}' - ) - message = message.with_channel("commentary") - message = message.with_recipient("functions.get_weather") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseFunctionToolCall) - assert output_items[0].type == "function_call" - assert output_items[0].name == "get_weather" - assert ( - output_items[0].arguments - == '{"location": "San Francisco", "units": "celsius"}' - ) - assert output_items[0].call_id.startswith("call_") - assert output_items[0].id.startswith("fc_") - - def test_commentary_with_python_recipient_creates_reasoning(self): - """Test that commentary with recipient='python' creates reasoning items.""" - message = Message.from_role_and_content( - Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))" - ) - message = message.with_channel("commentary") - message = message.with_recipient("python") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseReasoningItem) - assert output_items[0].type == "reasoning" - assert ( - output_items[0].content[0].text - == "import numpy as np\nprint(np.array([1, 2, 3]))" - ) - - def test_commentary_with_browser_recipient_creates_reasoning(self): - """Test that commentary with recipient='browser' creates reasoning items.""" - message = Message.from_role_and_content( - Role.ASSISTANT, "Navigating to the specified URL" - ) - message = message.with_channel("commentary") - message = message.with_recipient("browser") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseReasoningItem) - assert output_items[0].type == "reasoning" - assert output_items[0].content[0].text == "Navigating to the specified URL" - - def test_commentary_with_container_recipient_creates_reasoning(self): - """Test that commentary with recipient='container' creates reasoning items.""" - message = Message.from_role_and_content( - Role.ASSISTANT, "Running command in container" - ) - message = message.with_channel("commentary") - message = message.with_recipient("container") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseReasoningItem) - assert output_items[0].type == "reasoning" - assert output_items[0].content[0].text == "Running command in container" - - def test_commentary_with_empty_content_and_no_recipient(self): - """Test edge case: empty commentary with recipient=None.""" - message = Message.from_role_and_content(Role.ASSISTANT, "") - message = message.with_channel("commentary") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseOutputMessage) - assert output_items[0].content[0].text == "" - - def test_commentary_with_multiple_contents_and_no_recipient(self): - """Test multiple content items in commentary with no recipient.""" - contents = [ - TextContent(text="Step 1: Analyze the request"), - TextContent(text="Step 2: Prepare to call functions"), - ] - message = Message.from_role_and_contents(Role.ASSISTANT, contents) - message = message.with_channel("commentary") - - output_items = parse_output_message(message) - - # _parse_final_message returns single ResponseOutputMessage with - # multiple contents - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseOutputMessage) - assert len(output_items[0].content) == 2 - assert output_items[0].content[0].text == "Step 1: Analyze the request" - assert output_items[0].content[1].text == "Step 2: Prepare to call functions" - - def test_commentary_with_multiple_function_calls(self): - """Test multiple function calls in commentary channel.""" - contents = [ - TextContent(text='{"location": "San Francisco"}'), - TextContent(text='{"location": "New York"}'), - ] - message = Message.from_role_and_contents(Role.ASSISTANT, contents) - message = message.with_channel("commentary") - message = message.with_recipient("functions.get_weather") - - output_items = parse_output_message(message) - - assert len(output_items) == 2 - assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items) - assert output_items[0].name == "get_weather" - assert output_items[1].name == "get_weather" - assert output_items[0].arguments == '{"location": "San Francisco"}' - assert output_items[1].arguments == '{"location": "New York"}' - - def test_commentary_with_unknown_recipient_creates_mcp_call(self): - """Test that commentary with unknown recipient creates MCP call.""" - message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}') - message = message.with_channel("commentary") - message = message.with_recipient("custom_tool") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], McpCall) - assert output_items[0].type == "mcp_call" - assert output_items[0].name == "custom_tool" - assert output_items[0].server_label == "custom_tool" - - def test_analysis_channel_creates_reasoning(self): - """Test that analysis channel creates reasoning items.""" - message = Message.from_role_and_content( - Role.ASSISTANT, "Analyzing the problem step by step..." - ) - message = message.with_channel("analysis") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], ResponseReasoningItem) - assert output_items[0].type == "reasoning" - assert ( - output_items[0].content[0].text == "Analyzing the problem step by step..." - ) - - def test_non_assistant_message_returns_empty(self): - """Test that non-assistant messages return empty list. - - Per the implementation, tool messages to assistant (e.g., search results) - are not included in final output to align with OpenAI behavior. - """ - message = Message.from_author_and_content( - Author.new(Role.TOOL, "functions.get_weather"), - "The weather is sunny, 72°F", - ) - - output_items = parse_output_message(message) - - assert len(output_items) == 0 - - def test_has_custom_tools() -> None: assert not has_custom_tools(set()) assert not has_custom_tools({"web_search_preview", "code_interpreter", "container"}) @@ -1091,185 +817,6 @@ def test_has_custom_tools() -> None: ) -def test_parse_mcp_call_basic() -> None: - """Test that MCP calls are parsed with correct type and server_label.""" - message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}') - message = message.with_recipient("filesystem") - message = message.with_channel("commentary") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], McpCall) - assert output_items[0].type == "mcp_call" - assert output_items[0].name == "filesystem" - assert output_items[0].server_label == "filesystem" - assert output_items[0].arguments == '{"path": "/tmp"}' - assert output_items[0].status == "completed" - - -def test_parse_mcp_call_dotted_recipient() -> None: - """Test that dotted recipients extract the tool name correctly.""" - message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}') - message = message.with_recipient("repo_browser.list") - message = message.with_channel("commentary") - - output_items = parse_output_message(message) - - assert len(output_items) == 1 - assert isinstance(output_items[0], McpCall) - assert output_items[0].name == "list" - assert output_items[0].server_label == "repo_browser" - - -def test_mcp_vs_function_call() -> None: - """Test that function calls are not parsed as MCP calls.""" - func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}') - func_message = func_message.with_recipient("functions.my_tool") - func_message = func_message.with_channel("commentary") - - func_items = parse_output_message(func_message) - - assert len(func_items) == 1 - assert not isinstance(func_items[0], McpCall) - assert func_items[0].type == "function_call" - - -def test_mcp_vs_builtin_tools() -> None: - """Test that built-in tools (python, container) are not parsed as MCP calls.""" - # Test python (built-in tool) - should be reasoning, not MCP - python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')") - python_message = python_message.with_recipient("python") - python_message = python_message.with_channel("commentary") - - python_items = parse_output_message(python_message) - - assert len(python_items) == 1 - assert not isinstance(python_items[0], McpCall) - assert python_items[0].type == "reasoning" - - -def test_parse_remaining_state_commentary_channel() -> None: - """Test parse_remaining_state with commentary channel and various recipients.""" - from unittest.mock import Mock - - from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state - - # Test 1: functions.* recipient → should return function tool call - parser_func = Mock() - parser_func.current_content = '{"arg": "value"}' - parser_func.current_role = Role.ASSISTANT - parser_func.current_channel = "commentary" - parser_func.current_recipient = "functions.my_tool" - - func_items = parse_remaining_state(parser_func) - - assert len(func_items) == 1 - assert not isinstance(func_items[0], McpCall) - assert func_items[0].type == "function_call" - assert func_items[0].name == "my_tool" - assert func_items[0].status == "in_progress" - - # Test 2: MCP tool (not builtin) → should return MCP call - parser_mcp = Mock() - parser_mcp.current_content = '{"path": "/tmp"}' - parser_mcp.current_role = Role.ASSISTANT - parser_mcp.current_channel = "commentary" - parser_mcp.current_recipient = "filesystem" - - mcp_items = parse_remaining_state(parser_mcp) - - assert len(mcp_items) == 1 - assert isinstance(mcp_items[0], McpCall) - assert mcp_items[0].type == "mcp_call" - assert mcp_items[0].name == "filesystem" - assert mcp_items[0].server_label == "filesystem" - assert mcp_items[0].status == "in_progress" - - # Test 3: Built-in tool (python) - # should NOT return MCP call, returns reasoning (internal tool interaction) - parser_builtin = Mock() - parser_builtin.current_content = "print('hello')" - parser_builtin.current_role = Role.ASSISTANT - parser_builtin.current_channel = "commentary" - parser_builtin.current_recipient = "python" - - builtin_items = parse_remaining_state(parser_builtin) - - # Built-in tools explicitly return reasoning - assert len(builtin_items) == 1 - assert not isinstance(builtin_items[0], McpCall) - assert builtin_items[0].type == "reasoning" - - # Test 4: No recipient (preamble) → should return message, not reasoning - parser_preamble = Mock() - parser_preamble.current_content = "I'll search for that information now." - parser_preamble.current_role = Role.ASSISTANT - parser_preamble.current_channel = "commentary" - parser_preamble.current_recipient = None - - preamble_items = parse_remaining_state(parser_preamble) - - assert len(preamble_items) == 1 - assert isinstance(preamble_items[0], ResponseOutputMessage) - assert preamble_items[0].type == "message" - assert preamble_items[0].content[0].text == "I'll search for that information now." - assert preamble_items[0].status == "incomplete" # streaming - - -def test_parse_remaining_state_analysis_channel() -> None: - """Test parse_remaining_state with analysis channel and various recipients.""" - from unittest.mock import Mock - - from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state - - # Test 1: functions.* recipient → should return function tool call - parser_func = Mock() - parser_func.current_content = '{"arg": "value"}' - parser_func.current_role = Role.ASSISTANT - parser_func.current_channel = "analysis" - parser_func.current_recipient = "functions.my_tool" - - func_items = parse_remaining_state(parser_func) - - assert len(func_items) == 1 - assert not isinstance(func_items[0], McpCall) - assert func_items[0].type == "function_call" - assert func_items[0].name == "my_tool" - assert func_items[0].status == "in_progress" - - # Test 2: MCP tool (not builtin) → should return MCP call - parser_mcp = Mock() - parser_mcp.current_content = '{"query": "test"}' - parser_mcp.current_role = Role.ASSISTANT - parser_mcp.current_channel = "analysis" - parser_mcp.current_recipient = "database" - - mcp_items = parse_remaining_state(parser_mcp) - - assert len(mcp_items) == 1 - assert isinstance(mcp_items[0], McpCall) - assert mcp_items[0].type == "mcp_call" - assert mcp_items[0].name == "database" - assert mcp_items[0].server_label == "database" - assert mcp_items[0].status == "in_progress" - - # Test 3: Built-in tool (container) - # should NOT return MCP call, falls through to reasoning - parser_builtin = Mock() - parser_builtin.current_content = "docker run" - parser_builtin.current_role = Role.ASSISTANT - parser_builtin.current_channel = "analysis" - parser_builtin.current_recipient = "container" - - builtin_items = parse_remaining_state(parser_builtin) - - # Should fall through to reasoning logic - assert len(builtin_items) == 1 - assert not isinstance(builtin_items[0], McpCall) - assert builtin_items[0].type == "reasoning" - - class TestGetSystemMessage: """Tests for get_system_message channel configuration.""" diff --git a/tests/entrypoints/openai/responses/test_harmony_utils.py b/tests/entrypoints/openai/responses/test_harmony_utils.py new file mode 100644 index 000000000..e51538298 --- /dev/null +++ b/tests/entrypoints/openai/responses/test_harmony_utils.py @@ -0,0 +1,463 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for vllm.entrypoints.openai.responses.harmony.""" + +from openai.types.responses import ( + ResponseFunctionToolCall, + ResponseOutputMessage, + ResponseReasoningItem, +) +from openai.types.responses.response_output_item import McpCall +from openai_harmony import Author, Message, Role, TextContent + +from vllm.entrypoints.openai.responses.harmony import ( + harmony_to_response_output, + parser_state_to_response_output, + response_previous_input_to_harmony, +) + + +class TestResponsePreviousInputToHarmony: + """ + Tests for scenarios that are specific to the Responses API + response_previous_input_to_harmony function. + """ + + def test_message_with_empty_content(self): + """Test parsing message with empty string content.""" + chat_msg = { + "role": "user", + "content": "", + } + + messages = response_previous_input_to_harmony(chat_msg) + + assert len(messages) == 1 + assert messages[0].content[0].text == "" + + def test_tool_message_with_string_content(self): + """Test parsing tool message with string content.""" + chat_msg = { + "role": "tool", + "name": "get_weather", + "content": "The weather in San Francisco is sunny, 72°F", + } + + messages = response_previous_input_to_harmony(chat_msg) + + assert len(messages) == 1 + assert messages[0].author.role == Role.TOOL + assert messages[0].author.name == "functions.get_weather" + assert ( + messages[0].content[0].text == "The weather in San Francisco is sunny, 72°F" + ) + assert messages[0].channel == "commentary" + + def test_tool_message_with_array_content(self): + """Test parsing tool message with array content.""" + chat_msg = { + "role": "tool", + "name": "search_results", + "content": [ + {"type": "text", "text": "Result 1: "}, + {"type": "text", "text": "Result 2: "}, + { + "type": "image", + "url": "http://example.com/img.png", + }, # Should be ignored + {"type": "text", "text": "Result 3"}, + ], + } + + messages = response_previous_input_to_harmony(chat_msg) + + assert len(messages) == 1 + assert messages[0].author.role == Role.TOOL + assert messages[0].author.name == "functions.search_results" + assert messages[0].content[0].text == "Result 1: Result 2: Result 3" + + def test_tool_message_with_empty_content(self): + """Test parsing tool message with None content.""" + chat_msg = { + "role": "tool", + "name": "empty_tool", + "content": None, + } + + messages = response_previous_input_to_harmony(chat_msg) + + assert len(messages) == 1 + assert messages[0].author.role == Role.TOOL + assert messages[0].author.name == "functions.empty_tool" + assert messages[0].content[0].text == "" + + +class TestHarmonyToResponseOutput: + """Tests for harmony_to_response_output function.""" + + def test_commentary_with_no_recipient_creates_message(self): + """Test that commentary with recipient=None (preambles) creates message items. + + Per Harmony format, preambles are intended to be shown to end-users, + unlike analysis channel content which is hidden reasoning. + See: https://cookbook.openai.com/articles/openai-harmony + """ + message = Message.from_role_and_content( + Role.ASSISTANT, "I will now search for the weather information." + ) + message = message.with_channel("commentary") + # recipient is None by default, representing a preamble + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseOutputMessage) + assert output_items[0].type == "message" + assert output_items[0].role == "assistant" + assert output_items[0].status == "completed" + assert len(output_items[0].content) == 1 + assert output_items[0].content[0].type == "output_text" + assert ( + output_items[0].content[0].text + == "I will now search for the weather information." + ) + + def test_commentary_with_function_recipient_creates_function_call(self): + """Test commentary with recipient='functions.X' creates function calls.""" + message = Message.from_role_and_content( + Role.ASSISTANT, '{"location": "San Francisco", "units": "celsius"}' + ) + message = message.with_channel("commentary") + message = message.with_recipient("functions.get_weather") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseFunctionToolCall) + assert output_items[0].type == "function_call" + assert output_items[0].name == "get_weather" + assert ( + output_items[0].arguments + == '{"location": "San Francisco", "units": "celsius"}' + ) + assert output_items[0].call_id.startswith("call_") + assert output_items[0].id.startswith("fc_") + + def test_commentary_with_python_recipient_creates_reasoning(self): + """Test that commentary with recipient='python' creates reasoning items.""" + message = Message.from_role_and_content( + Role.ASSISTANT, "import numpy as np\nprint(np.array([1, 2, 3]))" + ) + message = message.with_channel("commentary") + message = message.with_recipient("python") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseReasoningItem) + assert output_items[0].type == "reasoning" + assert ( + output_items[0].content[0].text + == "import numpy as np\nprint(np.array([1, 2, 3]))" + ) + + def test_commentary_with_browser_recipient_creates_reasoning(self): + """Test that commentary with recipient='browser' creates reasoning items.""" + message = Message.from_role_and_content( + Role.ASSISTANT, "Navigating to the specified URL" + ) + message = message.with_channel("commentary") + message = message.with_recipient("browser") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseReasoningItem) + assert output_items[0].type == "reasoning" + assert output_items[0].content[0].text == "Navigating to the specified URL" + + def test_commentary_with_container_recipient_creates_reasoning(self): + """Test that commentary with recipient='container' creates reasoning items.""" + message = Message.from_role_and_content( + Role.ASSISTANT, "Running command in container" + ) + message = message.with_channel("commentary") + message = message.with_recipient("container") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseReasoningItem) + assert output_items[0].type == "reasoning" + assert output_items[0].content[0].text == "Running command in container" + + def test_commentary_with_empty_content_and_no_recipient(self): + """Test edge case: empty commentary with recipient=None.""" + message = Message.from_role_and_content(Role.ASSISTANT, "") + message = message.with_channel("commentary") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseOutputMessage) + assert output_items[0].content[0].text == "" + + def test_commentary_with_multiple_contents_and_no_recipient(self): + """Test multiple content items in commentary with no recipient.""" + contents = [ + TextContent(text="Step 1: Analyze the request"), + TextContent(text="Step 2: Prepare to call functions"), + ] + message = Message.from_role_and_contents(Role.ASSISTANT, contents) + message = message.with_channel("commentary") + + output_items = harmony_to_response_output(message) + + # _parse_final_message returns single ResponseOutputMessage with + # multiple contents + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseOutputMessage) + assert len(output_items[0].content) == 2 + assert output_items[0].content[0].text == "Step 1: Analyze the request" + assert output_items[0].content[1].text == "Step 2: Prepare to call functions" + + def test_commentary_with_multiple_function_calls(self): + """Test multiple function calls in commentary channel.""" + contents = [ + TextContent(text='{"location": "San Francisco"}'), + TextContent(text='{"location": "New York"}'), + ] + message = Message.from_role_and_contents(Role.ASSISTANT, contents) + message = message.with_channel("commentary") + message = message.with_recipient("functions.get_weather") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 2 + assert all(isinstance(item, ResponseFunctionToolCall) for item in output_items) + assert output_items[0].name == "get_weather" + assert output_items[1].name == "get_weather" + assert output_items[0].arguments == '{"location": "San Francisco"}' + assert output_items[1].arguments == '{"location": "New York"}' + + def test_commentary_with_unknown_recipient_creates_mcp_call(self): + """Test that commentary with unknown recipient creates MCP call.""" + message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}') + message = message.with_channel("commentary") + message = message.with_recipient("custom_tool") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], McpCall) + assert output_items[0].type == "mcp_call" + assert output_items[0].name == "custom_tool" + assert output_items[0].server_label == "custom_tool" + + def test_analysis_channel_creates_reasoning(self): + """Test that analysis channel creates reasoning items.""" + message = Message.from_role_and_content( + Role.ASSISTANT, "Analyzing the problem step by step..." + ) + message = message.with_channel("analysis") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], ResponseReasoningItem) + assert output_items[0].type == "reasoning" + assert ( + output_items[0].content[0].text == "Analyzing the problem step by step..." + ) + + def test_non_assistant_message_returns_empty(self): + """Test that non-assistant messages return empty list. + + Per the implementation, tool messages to assistant (e.g., search results) + are not included in final output to align with OpenAI behavior. + """ + message = Message.from_author_and_content( + Author.new(Role.TOOL, "functions.get_weather"), + "The weather is sunny, 72°F", + ) + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 0 + + +def test_parse_mcp_call_basic() -> None: + """Test that MCP calls are parsed with correct type and server_label.""" + message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}') + message = message.with_recipient("filesystem") + message = message.with_channel("commentary") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], McpCall) + assert output_items[0].type == "mcp_call" + assert output_items[0].name == "filesystem" + assert output_items[0].server_label == "filesystem" + assert output_items[0].arguments == '{"path": "/tmp"}' + assert output_items[0].status == "completed" + + +def test_parse_mcp_call_dotted_recipient() -> None: + """Test that dotted recipients extract the tool name correctly.""" + message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}') + message = message.with_recipient("repo_browser.list") + message = message.with_channel("commentary") + + output_items = harmony_to_response_output(message) + + assert len(output_items) == 1 + assert isinstance(output_items[0], McpCall) + assert output_items[0].name == "list" + assert output_items[0].server_label == "repo_browser" + + +def test_mcp_vs_function_call() -> None: + """Test that function calls are not parsed as MCP calls.""" + func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}') + func_message = func_message.with_recipient("functions.my_tool") + func_message = func_message.with_channel("commentary") + + func_items = harmony_to_response_output(func_message) + + assert len(func_items) == 1 + assert not isinstance(func_items[0], McpCall) + assert func_items[0].type == "function_call" + + +def test_mcp_vs_builtin_tools() -> None: + """Test that built-in tools (python, container) are not parsed as MCP calls.""" + # Test python (built-in tool) - should be reasoning, not MCP + python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')") + python_message = python_message.with_recipient("python") + python_message = python_message.with_channel("commentary") + + python_items = harmony_to_response_output(python_message) + + assert len(python_items) == 1 + assert not isinstance(python_items[0], McpCall) + assert python_items[0].type == "reasoning" + + +def test_parser_state_to_response_output_commentary_channel() -> None: + """Test parser_state_to_response_output with commentary + channel and various recipients.""" + from unittest.mock import Mock + + # Test 1: functions.* recipient -> should return function tool call + parser_func = Mock() + parser_func.current_content = '{"arg": "value"}' + parser_func.current_role = Role.ASSISTANT + parser_func.current_channel = "commentary" + parser_func.current_recipient = "functions.my_tool" + + func_items = parser_state_to_response_output(parser_func) + + assert len(func_items) == 1 + assert not isinstance(func_items[0], McpCall) + assert func_items[0].type == "function_call" + assert func_items[0].name == "my_tool" + assert func_items[0].status == "in_progress" + + # Test 2: MCP tool (not builtin) -> should return MCP call + parser_mcp = Mock() + parser_mcp.current_content = '{"path": "/tmp"}' + parser_mcp.current_role = Role.ASSISTANT + parser_mcp.current_channel = "commentary" + parser_mcp.current_recipient = "filesystem" + + mcp_items = parser_state_to_response_output(parser_mcp) + + assert len(mcp_items) == 1 + assert isinstance(mcp_items[0], McpCall) + assert mcp_items[0].type == "mcp_call" + assert mcp_items[0].name == "filesystem" + assert mcp_items[0].server_label == "filesystem" + assert mcp_items[0].status == "in_progress" + + # Test 3: Built-in tool (python) + # should NOT return MCP call, returns reasoning (internal tool interaction) + parser_builtin = Mock() + parser_builtin.current_content = "print('hello')" + parser_builtin.current_role = Role.ASSISTANT + parser_builtin.current_channel = "commentary" + parser_builtin.current_recipient = "python" + + builtin_items = parser_state_to_response_output(parser_builtin) + + # Built-in tools explicitly return reasoning + assert len(builtin_items) == 1 + assert not isinstance(builtin_items[0], McpCall) + assert builtin_items[0].type == "reasoning" + + # Test 4: No recipient (preamble) → should return message, not reasoning + parser_preamble = Mock() + parser_preamble.current_content = "I'll search for that information now." + parser_preamble.current_role = Role.ASSISTANT + parser_preamble.current_channel = "commentary" + parser_preamble.current_recipient = None + + preamble_items = parser_state_to_response_output(parser_preamble) + + assert len(preamble_items) == 1 + assert isinstance(preamble_items[0], ResponseOutputMessage) + assert preamble_items[0].type == "message" + assert preamble_items[0].content[0].text == "I'll search for that information now." + assert preamble_items[0].status == "incomplete" # streaming + + +def test_parser_state_to_response_output_analysis_channel() -> None: + """Test parser_state_to_response_output with analysis + channel and various recipients.""" + from unittest.mock import Mock + + # Test 1: functions.* recipient -> should return function tool call + parser_func = Mock() + parser_func.current_content = '{"arg": "value"}' + parser_func.current_role = Role.ASSISTANT + parser_func.current_channel = "analysis" + parser_func.current_recipient = "functions.my_tool" + + func_items = parser_state_to_response_output(parser_func) + + assert len(func_items) == 1 + assert not isinstance(func_items[0], McpCall) + assert func_items[0].type == "function_call" + assert func_items[0].name == "my_tool" + assert func_items[0].status == "in_progress" + + # Test 2: MCP tool (not builtin) -> should return MCP call + parser_mcp = Mock() + parser_mcp.current_content = '{"query": "test"}' + parser_mcp.current_role = Role.ASSISTANT + parser_mcp.current_channel = "analysis" + parser_mcp.current_recipient = "database" + + mcp_items = parser_state_to_response_output(parser_mcp) + + assert len(mcp_items) == 1 + assert isinstance(mcp_items[0], McpCall) + assert mcp_items[0].type == "mcp_call" + assert mcp_items[0].name == "database" + assert mcp_items[0].server_label == "database" + assert mcp_items[0].status == "in_progress" + + # Test 3: Built-in tool (container) + # should NOT return MCP call, falls through to reasoning + parser_builtin = Mock() + parser_builtin.current_content = "docker run" + parser_builtin.current_role = Role.ASSISTANT + parser_builtin.current_channel = "analysis" + parser_builtin.current_recipient = "container" + + builtin_items = parser_state_to_response_output(parser_builtin) + + # Should fall through to reasoning logic + assert len(builtin_items) == 1 + assert not isinstance(builtin_items[0], McpCall) + assert builtin_items[0].type == "reasoning" diff --git a/tests/entrypoints/openai/responses/test_mcp_tools.py b/tests/entrypoints/openai/responses/test_mcp_tools.py index 310af4308..55445f188 100644 --- a/tests/entrypoints/openai/responses/test_mcp_tools.py +++ b/tests/entrypoints/openai/responses/test_mcp_tools.py @@ -97,16 +97,16 @@ class TestMCPToolServerUnit: assert server.get_tool_description("test_server", allowed_tools=[]) is None def test_builtin_tools_consistency(self): - """MCP_BUILTIN_TOOLS must match _BUILTIN_TOOL_TO_MCP_SERVER_LABEL values.""" + """MCP_BUILTIN_TOOLS must match BUILTIN_TOOL_TO_MCP_SERVER_LABEL values.""" from vllm.entrypoints.openai.parser.harmony_utils import ( - _BUILTIN_TOOL_TO_MCP_SERVER_LABEL, + BUILTIN_TOOL_TO_MCP_SERVER_LABEL, MCP_BUILTIN_TOOLS, ) - assert set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, ( + assert set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) == MCP_BUILTIN_TOOLS, ( f"MCP_BUILTIN_TOOLS {MCP_BUILTIN_TOOLS} does not match " - f"_BUILTIN_TOOL_TO_MCP_SERVER_LABEL values " - f"{set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}" + f"BUILTIN_TOOL_TO_MCP_SERVER_LABEL values " + f"{set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values())}" ) diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py index 9dfd5f518..9b4264456 100644 --- a/vllm/entrypoints/openai/parser/harmony_utils.py +++ b/vllm/entrypoints/openai/parser/harmony_utils.py @@ -2,27 +2,9 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import datetime -import json from collections.abc import Iterable, Sequence from typing import Literal -from openai.types.responses import ( - ResponseFunctionToolCall, - ResponseOutputItem, - ResponseOutputMessage, - ResponseOutputText, - ResponseReasoningItem, -) -from openai.types.responses.response_function_web_search import ( - ActionFind, - ActionOpenPage, - ActionSearch, - ResponseFunctionWebSearch, -) -from openai.types.responses.response_output_item import McpCall -from openai.types.responses.response_reasoning_item import ( - Content as ResponseReasoningTextContent, -) from openai.types.responses.tool import Tool from openai_harmony import ( Author, @@ -38,17 +20,10 @@ from openai_harmony import ( ToolDescription, load_harmony_encoding, ) -from openai_harmony import Message as OpenAIHarmonyMessage -from openai_harmony import Role as OpenAIHarmonyRole from vllm import envs from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionToolsParam -from vllm.entrypoints.openai.responses.protocol import ( - ResponseInputOutputItem, - ResponsesRequest, -) from vllm.logger import init_logger -from vllm.utils import random_uuid logger = init_logger(__name__) @@ -64,14 +39,14 @@ _harmony_encoding = None # they are available and requested by the user. # Tool args are provided by MCP tool descriptions. Output # of the tools are stringified. -_BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = { +BUILTIN_TOOL_TO_MCP_SERVER_LABEL: dict[str, str] = { "python": "code_interpreter", "browser": "web_search_preview", "container": "container", } # Derive MCP_BUILTIN_TOOLS from the canonical mapping -MCP_BUILTIN_TOOLS: set[str] = set(_BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) +MCP_BUILTIN_TOOLS: set[str] = set(BUILTIN_TOOL_TO_MCP_SERVER_LABEL.values()) def has_custom_tools(tool_types: set[str]) -> bool: @@ -179,55 +154,6 @@ def get_user_message(content: str) -> Message: return Message.from_role_and_content(Role.USER, content) -def parse_response_input( - response_msg: ResponseInputOutputItem, - prev_responses: list[ResponseOutputItem | ResponseReasoningItem], -) -> Message: - if not isinstance(response_msg, dict): - response_msg = response_msg.model_dump() - if "type" not in response_msg or response_msg["type"] == "message": - role = response_msg["role"] - content = response_msg["content"] - # Add prefix for developer messages. - # <|start|>developer<|message|># Instructions {instructions}<|end|> - text_prefix = "Instructions:\n" if role == "developer" else "" - if isinstance(content, str): - msg = Message.from_role_and_content(role, text_prefix + content) - else: - contents = [TextContent(text=text_prefix + c["text"]) for c in content] - msg = Message.from_role_and_contents(role, contents) - if role == "assistant": - msg = msg.with_channel("final") - elif response_msg["type"] == "function_call_output": - call_id = response_msg["call_id"] - call_response: ResponseFunctionToolCall | None = None - for prev_response in reversed(prev_responses): - if ( - isinstance(prev_response, ResponseFunctionToolCall) - and prev_response.call_id == call_id - ): - call_response = prev_response - break - if call_response is None: - raise ValueError(f"No call message found for {call_id}") - msg = Message.from_author_and_content( - Author.new(Role.TOOL, f"functions.{call_response.name}"), - response_msg["output"], - ) - elif response_msg["type"] == "reasoning": - content = response_msg["content"] - assert len(content) == 1 - msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"]) - elif response_msg["type"] == "function_call": - msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"]) - msg = msg.with_channel("commentary") - msg = msg.with_recipient(f"functions.{response_msg['name']}") - msg = msg.with_content_type("json") - else: - raise ValueError(f"Unknown input type: {response_msg['type']}") - return msg - - def parse_chat_inputs_to_harmony_messages(chat_msgs: list) -> list[Message]: """ Parse a list of messages from request.messages in the Chat Completion API to @@ -390,139 +316,6 @@ def parse_chat_input_to_harmony_message( return msgs -def parse_input_to_harmony_message(chat_msg) -> list[Message]: - """Parse a message from request.previous_input_messages - into Harmony messages. - - Supports both OpenAI chat format ({"role": "..."}) and - Harmony format ({"author": {"role": "..."}}). - """ - if not isinstance(chat_msg, dict): - chat_msg = chat_msg.model_dump(exclude_none=True) - - if "author" in chat_msg and isinstance(chat_msg.get("author"), dict): - return [_parse_harmony_format_message(chat_msg)] - - return _parse_chat_format_message(chat_msg) - - -def _parse_harmony_format_message(chat_msg: dict) -> Message: - """Reconstruct a Message from Harmony-format dict, - preserving channel, recipient, and content_type.""" - author_dict = chat_msg["author"] - role = author_dict.get("role") - name = author_dict.get("name") - - raw_content = chat_msg.get("content", "") - if isinstance(raw_content, list): - # TODO: Support refusal and non-text content types. - contents = [TextContent(text=c.get("text", "")) for c in raw_content] - elif isinstance(raw_content, str): - contents = [TextContent(text=raw_content)] - else: - contents = [TextContent(text="")] - - if name: - msg = Message.from_author_and_contents(Author.new(Role(role), name), contents) - else: - msg = Message.from_role_and_contents(Role(role), contents) - - channel = chat_msg.get("channel") - if channel: - msg = msg.with_channel(channel) - recipient = chat_msg.get("recipient") - if recipient: - msg = msg.with_recipient(recipient) - content_type = chat_msg.get("content_type") - if content_type: - msg = msg.with_content_type(content_type) - - return msg - - -def _parse_chat_format_message(chat_msg: dict) -> list[Message]: - """Parse an OpenAI chat-format dict into Harmony messages.""" - role = chat_msg.get("role") - if role is None: - raise ValueError(f"Message has no 'role' key: {chat_msg}") - - # Assistant message with tool calls - tool_calls = chat_msg.get("tool_calls") - if role == "assistant" and tool_calls: - msgs: list[Message] = [] - for call in tool_calls: - func = call.get("function", {}) - name = func.get("name", "") - arguments = func.get("arguments", "") or "" - msg = Message.from_role_and_content(Role.ASSISTANT, arguments) - msg = msg.with_channel("commentary") - msg = msg.with_recipient(f"functions.{name}") - msg = msg.with_content_type("json") - msgs.append(msg) - return msgs - - # Tool role message (tool output) - if role == "tool": - name = chat_msg.get("name", "") - if name and not name.startswith("functions."): - name = f"functions.{name}" - content = chat_msg.get("content", "") or "" - content = flatten_chat_text_content(content) - # NOTE: .with_recipient("assistant") is required on tool messages - # to match parse_chat_input_to_harmony_message behavior and ensure - # proper routing in the Harmony protocol. - msg = ( - Message.from_author_and_content(Author.new(Role.TOOL, name), content) - .with_channel("commentary") - .with_recipient("assistant") - ) - return [msg] - - # Default: user/assistant/system messages - content = chat_msg.get("content", "") - if isinstance(content, str): - contents = [TextContent(text=content)] - else: - # TODO: Support refusal. - contents = [TextContent(text=c.get("text", "")) for c in content] - msg = Message.from_role_and_contents(role, contents) - return [msg] - - -def construct_harmony_previous_input_messages( - request: ResponsesRequest, -) -> list[OpenAIHarmonyMessage]: - messages: list[OpenAIHarmonyMessage] = [] - if request.previous_input_messages: - for message in request.previous_input_messages: - # Handle both OpenAIHarmonyMessage objects and dictionary inputs - if isinstance(message, OpenAIHarmonyMessage): - message_role = message.author.role - # To match OpenAI, instructions, reasoning and tools are - # always taken from the most recent Responses API request - # not carried over from previous requests - if ( - message_role == OpenAIHarmonyRole.SYSTEM - or message_role == OpenAIHarmonyRole.DEVELOPER - ): - continue - messages.append(message) - else: - harmony_messages = parse_input_to_harmony_message(message) - for harmony_msg in harmony_messages: - message_role = harmony_msg.author.role - # To match OpenAI, instructions, reasoning and tools are - # always taken from the most recent Responses API request - # not carried over from previous requests - if ( - message_role == OpenAIHarmonyRole.SYSTEM - or message_role == OpenAIHarmonyRole.DEVELOPER - ): - continue - messages.append(harmony_msg) - return messages - - def render_for_completion(messages: list[Message]) -> list[int]: conversation = Conversation.from_messages(messages) token_ids = get_encoding().render_conversation_for_completion( @@ -531,313 +324,6 @@ def render_for_completion(messages: list[Message]) -> list[int]: return token_ids -def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem: - """Parse browser tool calls (search, open, find) into web search items.""" - if len(message.content) != 1: - raise ValueError("Invalid number of contents in browser message") - content = message.content[0] - - # Parse JSON args (with retry detection) - try: - browser_call = json.loads(content.text) - except json.JSONDecodeError: - logger.warning( - "Invalid JSON in browser tool call, using error placeholder: %s", - content.text, - ) - json_retry_output_message = ( - f"Invalid JSON args, caught and retried: {content.text}" - ) - browser_call = { - "query": json_retry_output_message, - "url": json_retry_output_message, - "pattern": json_retry_output_message, - } - - # Create appropriate action based on recipient - if recipient == "browser.search": - action = ActionSearch( - query=f"cursor:{browser_call.get('query', '')}", type="search" - ) - elif recipient == "browser.open": - action = ActionOpenPage( - url=f"cursor:{browser_call.get('url', '')}", type="open_page" - ) - elif recipient == "browser.find": - action = ActionFind( - pattern=browser_call.get("pattern", ""), - url=f"cursor:{browser_call.get('url', '')}", - type="find", - ) - else: - raise ValueError(f"Unknown browser action: {recipient}") - - return ResponseFunctionWebSearch( - id=f"ws_{random_uuid()}", - action=action, - status="completed", - type="web_search_call", - ) - - -def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]: - """Parse function calls into function tool call items.""" - function_name = recipient.split(".")[-1] - output_items = [] - for content in message.content: - random_id = random_uuid() - response_item = ResponseFunctionToolCall( - arguments=content.text, - call_id=f"call_{random_id}", - type="function_call", - name=function_name, - id=f"fc_{random_id}", - ) - output_items.append(response_item) - return output_items - - -def _parse_reasoning(message: Message) -> list[ResponseOutputItem]: - """Parse reasoning/analysis content into reasoning items.""" - output_items = [] - for content in message.content: - reasoning_item = ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent(text=content.text, type="reasoning_text") - ], - status=None, - ) - output_items.append(reasoning_item) - return output_items - - -def _parse_final_message(message: Message) -> ResponseOutputItem: - """Parse final channel messages into output message items.""" - contents = [] - for content in message.content: - output_text = ResponseOutputText( - text=content.text, - annotations=[], # TODO - type="output_text", - logprobs=None, # TODO - ) - contents.append(output_text) - return ResponseOutputMessage( - id=f"msg_{random_uuid()}", - content=contents, - role=message.author.role, - status="completed", - type="message", - ) - - -def _parse_mcp_recipient(recipient: str) -> tuple[str, str]: - """ - Parse MCP recipient into (server_label, tool_name). - - For dotted recipients like "repo_browser.list": - - server_label: "repo_browser" (namespace/server) - - tool_name: "list" (specific tool) - - For simple recipients like "filesystem": - - server_label: "filesystem" - - tool_name: "filesystem" - """ - if "." in recipient: - server_label = recipient.split(".")[0] - tool_name = recipient.split(".")[-1] - else: - server_label = recipient - tool_name = recipient - return server_label, tool_name - - -def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]: - """Parse MCP calls into MCP call items.""" - # Handle built-in tools that need server_label mapping - if recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL: - server_label = _BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient] - tool_name = recipient - else: - server_label, tool_name = _parse_mcp_recipient(recipient) - - output_items = [] - for content in message.content: - response_item = McpCall( - arguments=content.text, - type="mcp_call", - name=tool_name, - server_label=server_label, - id=f"mcp_{random_uuid()}", - status="completed", - ) - output_items.append(response_item) - return output_items - - -def _parse_message_no_recipient( - message: Message, -) -> list[ResponseOutputItem]: - """Parse a Harmony message with no recipient based on its channel.""" - if message.channel == "analysis": - return _parse_reasoning(message) - - if message.channel in ("commentary", "final"): - # Per Harmony format, preambles (commentary with no recipient) and - # final channel content are both intended to be shown to end-users. - # See: https://cookbook.openai.com/articles/openai-harmony - return [_parse_final_message(message)] - - raise ValueError(f"Unknown channel: {message.channel}") - - -def parse_output_message(message: Message) -> list[ResponseOutputItem]: - """ - Parse a Harmony message into a list of output response items. - """ - if message.author.role != "assistant": - # This is a message from a tool to the assistant (e.g., search result). - # Don't include it in the final output for now. This aligns with - # OpenAI's behavior on models like o4-mini. - return [] - - output_items: list[ResponseOutputItem] = [] - recipient = message.recipient - - if recipient is not None: - # Browser tool calls (browser.search, browser.open, browser.find) - if recipient.startswith("browser."): - output_items.append(_parse_browser_tool_call(message, recipient)) - - # Function calls (should only happen on commentary channel) - elif message.channel == "commentary" and recipient.startswith("functions."): - output_items.extend(_parse_function_call(message, recipient)) - - # Built-in MCP tools (python, browser, container) - elif recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL: - output_items.extend(_parse_reasoning(message)) - - # All other recipients are MCP calls - else: - output_items.extend(_parse_mcp_call(message, recipient)) - - # No recipient - handle based on channel for non-tool messages - else: - output_items.extend(_parse_message_no_recipient(message)) - - return output_items - - -def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]: - if not parser.current_content: - return [] - if parser.current_role != Role.ASSISTANT: - return [] - current_recipient = parser.current_recipient - if current_recipient is not None and current_recipient.startswith("browser."): - return [] - - if current_recipient and parser.current_channel in ("commentary", "analysis"): - if current_recipient.startswith("functions."): - rid = random_uuid() - return [ - ResponseFunctionToolCall( - arguments=parser.current_content, - call_id=f"call_{rid}", - type="function_call", - name=current_recipient.split(".")[-1], - id=f"fc_{rid}", - status="in_progress", - ) - ] - # Built-in MCP tools (python, browser, container) - elif current_recipient in _BUILTIN_TOOL_TO_MCP_SERVER_LABEL: - return [ - ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent( - text=parser.current_content, type="reasoning_text" - ) - ], - status=None, - ) - ] - # All other recipients are MCP calls - else: - rid = random_uuid() - server_label, tool_name = _parse_mcp_recipient(current_recipient) - return [ - McpCall( - arguments=parser.current_content, - type="mcp_call", - name=tool_name, - server_label=server_label, - id=f"mcp_{rid}", - status="in_progress", - ) - ] - - if parser.current_channel == "commentary": - # Per Harmony format, preambles (commentary with no recipient) are - # intended to be shown to end-users, unlike analysis channel content. - output_text = ResponseOutputText( - text=parser.current_content, - annotations=[], - type="output_text", - logprobs=None, - ) - return [ - ResponseOutputMessage( - id=f"msg_{random_uuid()}", - content=[output_text], - role="assistant", - status="incomplete", - type="message", - ) - ] - - if parser.current_channel == "analysis": - return [ - ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent( - text=parser.current_content, type="reasoning_text" - ) - ], - status=None, - ) - ] - - if parser.current_channel == "final": - output_text = ResponseOutputText( - text=parser.current_content, - annotations=[], # TODO - type="output_text", - logprobs=None, # TODO - ) - text_item = ResponseOutputMessage( - id=f"msg_{random_uuid()}", - content=[output_text], - role="assistant", - # if the parser still has messages (ie if the generator got cut - # abruptly), this should be incomplete - status="incomplete", - type="message", - ) - return [text_item] - - return [] - - def get_stop_tokens_for_assistant_actions() -> list[int]: return get_encoding().stop_tokens_for_assistant_actions() diff --git a/vllm/entrypoints/openai/responses/harmony.py b/vllm/entrypoints/openai/responses/harmony.py new file mode 100644 index 000000000..460f31092 --- /dev/null +++ b/vllm/entrypoints/openai/responses/harmony.py @@ -0,0 +1,552 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Harmony ↔ Responses API conversion utilities. + +Handles two directions: + 1. Response Input → Harmony Messages (input parsing) + 2. Harmony Messages → Response Output Items (output parsing) +""" + +import json + +from openai.types.responses import ( + ResponseFunctionToolCall, + ResponseOutputItem, + ResponseOutputMessage, + ResponseOutputText, + ResponseReasoningItem, +) +from openai.types.responses.response_function_web_search import ( + ActionFind, + ActionOpenPage, + ActionSearch, + ResponseFunctionWebSearch, +) +from openai.types.responses.response_output_item import McpCall +from openai.types.responses.response_reasoning_item import ( + Content as ResponseReasoningTextContent, +) +from openai_harmony import Author, Message, Role, StreamableParser, TextContent + +from vllm.entrypoints.openai.parser.harmony_utils import ( + BUILTIN_TOOL_TO_MCP_SERVER_LABEL, + flatten_chat_text_content, +) +from vllm.entrypoints.openai.responses.protocol import ( + ResponseInputOutputItem, + ResponsesRequest, +) +from vllm.logger import init_logger +from vllm.utils import random_uuid + +logger = init_logger(__name__) + +# --------------------------------------------------------------------------- +# 1. Private helpers for input parsing +# --------------------------------------------------------------------------- + + +def _parse_harmony_format_message(chat_msg: dict) -> Message: + """Reconstruct a Message from Harmony-format dict, + preserving channel, recipient, and content_type.""" + author_dict = chat_msg["author"] + role = author_dict.get("role") + name = author_dict.get("name") + + raw_content = chat_msg.get("content", "") + if isinstance(raw_content, list): + # TODO: Support refusal and non-text content types. + contents = [TextContent(text=c.get("text", "")) for c in raw_content] + elif isinstance(raw_content, str): + contents = [TextContent(text=raw_content)] + else: + contents = [TextContent(text="")] + + if name: + msg = Message.from_author_and_contents(Author.new(Role(role), name), contents) + else: + msg = Message.from_role_and_contents(Role(role), contents) + + channel = chat_msg.get("channel") + if channel: + msg = msg.with_channel(channel) + recipient = chat_msg.get("recipient") + if recipient: + msg = msg.with_recipient(recipient) + content_type = chat_msg.get("content_type") + if content_type: + msg = msg.with_content_type(content_type) + + return msg + + +def _parse_chat_format_message(chat_msg: dict) -> list[Message]: + """Parse an OpenAI chat-format dict into Harmony messages.""" + role = chat_msg.get("role") + if role is None: + raise ValueError(f"Message has no 'role' key: {chat_msg}") + + # Assistant message with tool calls + tool_calls = chat_msg.get("tool_calls") + if role == "assistant" and tool_calls: + msgs: list[Message] = [] + for call in tool_calls: + func = call.get("function", {}) + name = func.get("name", "") + arguments = func.get("arguments", "") or "" + msg = Message.from_role_and_content(Role.ASSISTANT, arguments) + msg = msg.with_channel("commentary") + msg = msg.with_recipient(f"functions.{name}") + msg = msg.with_content_type("json") + msgs.append(msg) + return msgs + + # Tool role message (tool output) + if role == "tool": + name = chat_msg.get("name", "") + if name and not name.startswith("functions."): + name = f"functions.{name}" + content = chat_msg.get("content", "") or "" + content = flatten_chat_text_content(content) + # NOTE: .with_recipient("assistant") is required on tool messages + # to match parse_chat_input_to_harmony_message behavior and ensure + # proper routing in the Harmony protocol. + msg = ( + Message.from_author_and_content(Author.new(Role.TOOL, name), content) + .with_channel("commentary") + .with_recipient("assistant") + ) + return [msg] + + # Default: user/assistant/system messages + content = chat_msg.get("content", "") + if isinstance(content, str): + contents = [TextContent(text=content)] + else: + # TODO: Support refusal. + contents = [TextContent(text=c.get("text", "")) for c in content] + msg = Message.from_role_and_contents(role, contents) + return [msg] + + +# --------------------------------------------------------------------------- +# 2. Public input parsing functions +# --------------------------------------------------------------------------- + + +def response_input_to_harmony( + response_msg: ResponseInputOutputItem, + prev_responses: list[ResponseOutputItem | ResponseReasoningItem], +) -> Message: + """Convert a single ResponseInputOutputItem into a Harmony Message.""" + if not isinstance(response_msg, dict): + response_msg = response_msg.model_dump() + if "type" not in response_msg or response_msg["type"] == "message": + role = response_msg["role"] + content = response_msg["content"] + # Add prefix for developer messages. + # <|start|>developer<|message|># Instructions {instructions}<|end|> + text_prefix = "Instructions:\n" if role == "developer" else "" + if isinstance(content, str): + msg = Message.from_role_and_content(role, text_prefix + content) + else: + contents = [TextContent(text=text_prefix + c["text"]) for c in content] + msg = Message.from_role_and_contents(role, contents) + if role == "assistant": + msg = msg.with_channel("final") + elif response_msg["type"] == "function_call_output": + call_id = response_msg["call_id"] + call_response: ResponseFunctionToolCall | None = None + for prev_response in reversed(prev_responses): + if ( + isinstance(prev_response, ResponseFunctionToolCall) + and prev_response.call_id == call_id + ): + call_response = prev_response + break + if call_response is None: + raise ValueError(f"No call message found for {call_id}") + msg = Message.from_author_and_content( + Author.new(Role.TOOL, f"functions.{call_response.name}"), + response_msg["output"], + ) + elif response_msg["type"] == "reasoning": + content = response_msg["content"] + assert len(content) == 1 + msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"]) + elif response_msg["type"] == "function_call": + msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"]) + msg = msg.with_channel("commentary") + msg = msg.with_recipient(f"functions.{response_msg['name']}") + msg = msg.with_content_type("json") + else: + raise ValueError(f"Unknown input type: {response_msg['type']}") + return msg + + +def response_previous_input_to_harmony(chat_msg) -> list[Message]: + """Parse a message from request.previous_input_messages + into Harmony messages. + + Supports both OpenAI chat format ({"role": "..."}) and + Harmony format ({"author": {"role": "..."}}). + """ + if not isinstance(chat_msg, dict): + chat_msg = chat_msg.model_dump(exclude_none=True) + + if "author" in chat_msg and isinstance(chat_msg.get("author"), dict): + return [_parse_harmony_format_message(chat_msg)] + + return _parse_chat_format_message(chat_msg) + + +def construct_harmony_previous_input_messages( + request: ResponsesRequest, +) -> list[Message]: + """Build a Harmony message list from request.previous_input_messages. + + Filters out system/developer messages to match OpenAI behavior where + instructions are always taken from the most recent Responses API request. + """ + messages: list[Message] = [] + if request.previous_input_messages: + for message in request.previous_input_messages: + # Handle both Message objects and dictionary inputs + if isinstance(message, Message): + message_role = message.author.role + if message_role == Role.SYSTEM or message_role == Role.DEVELOPER: + continue + messages.append(message) + else: + harmony_messages = response_previous_input_to_harmony(message) + for harmony_msg in harmony_messages: + message_role = harmony_msg.author.role + if message_role == Role.SYSTEM or message_role == Role.DEVELOPER: + continue + messages.append(harmony_msg) + return messages + + +# --------------------------------------------------------------------------- +# 3. Private helpers for output parsing +# --------------------------------------------------------------------------- + + +def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem: + """Parse browser tool calls (search, open, find) into web search items.""" + if len(message.content) != 1: + raise ValueError("Invalid number of contents in browser message") + content = message.content[0] + + # Parse JSON args (with retry detection) + try: + browser_call = json.loads(content.text) + except json.JSONDecodeError: + logger.warning( + "Invalid JSON in browser tool call, using error placeholder: %s", + content.text, + ) + json_retry_output_message = ( + f"Invalid JSON args, caught and retried: {content.text}" + ) + browser_call = { + "query": json_retry_output_message, + "url": json_retry_output_message, + "pattern": json_retry_output_message, + } + + # Create appropriate action based on recipient + if recipient == "browser.search": + action = ActionSearch( + query=f"cursor:{browser_call.get('query', '')}", type="search" + ) + elif recipient == "browser.open": + action = ActionOpenPage( + url=f"cursor:{browser_call.get('url', '')}", type="open_page" + ) + elif recipient == "browser.find": + action = ActionFind( + pattern=browser_call.get("pattern", ""), + url=f"cursor:{browser_call.get('url', '')}", + type="find", + ) + else: + raise ValueError(f"Unknown browser action: {recipient}") + + return ResponseFunctionWebSearch( + id=f"ws_{random_uuid()}", + action=action, + status="completed", + type="web_search_call", + ) + + +def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]: + """Parse function calls into function tool call items.""" + function_name = recipient.split(".")[-1] + output_items = [] + for content in message.content: + random_id = random_uuid() + response_item = ResponseFunctionToolCall( + arguments=content.text, + call_id=f"call_{random_id}", + type="function_call", + name=function_name, + id=f"fc_{random_id}", + ) + output_items.append(response_item) + return output_items + + +def _parse_reasoning(message: Message) -> list[ResponseOutputItem]: + """Parse reasoning/analysis content into reasoning items.""" + output_items = [] + for content in message.content: + reasoning_item = ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent(text=content.text, type="reasoning_text") + ], + status=None, + ) + output_items.append(reasoning_item) + return output_items + + +def _parse_final_message(message: Message) -> ResponseOutputItem: + """Parse final channel messages into output message items.""" + contents = [] + for content in message.content: + output_text = ResponseOutputText( + text=content.text, + annotations=[], # TODO + type="output_text", + logprobs=None, # TODO + ) + contents.append(output_text) + return ResponseOutputMessage( + id=f"msg_{random_uuid()}", + content=contents, + role=message.author.role, + status="completed", + type="message", + ) + + +def _parse_mcp_recipient(recipient: str) -> tuple[str, str]: + """Parse MCP recipient into (server_label, tool_name). + + For dotted recipients like "repo_browser.list": + - server_label: "repo_browser" (namespace/server) + - tool_name: "list" (specific tool) + + For simple recipients like "filesystem": + - server_label: "filesystem" + - tool_name: "filesystem" + """ + if "." in recipient: + server_label = recipient.split(".")[0] + tool_name = recipient.split(".")[-1] + else: + server_label = recipient + tool_name = recipient + return server_label, tool_name + + +def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]: + """Parse MCP calls into MCP call items.""" + # Handle built-in tools that need server_label mapping + if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL: + server_label = BUILTIN_TOOL_TO_MCP_SERVER_LABEL[recipient] + tool_name = recipient + else: + server_label, tool_name = _parse_mcp_recipient(recipient) + + output_items = [] + for content in message.content: + response_item = McpCall( + arguments=content.text, + type="mcp_call", + name=tool_name, + server_label=server_label, + id=f"mcp_{random_uuid()}", + status="completed", + ) + output_items.append(response_item) + return output_items + + +def _parse_message_no_recipient( + message: Message, +) -> list[ResponseOutputItem]: + """Parse a Harmony message with no recipient based on its channel.""" + if message.channel == "analysis": + return _parse_reasoning(message) + + if message.channel in ("commentary", "final"): + # Per Harmony format, preambles (commentary with no recipient) and + # final channel content are both intended to be shown to end-users. + # See: https://cookbook.openai.com/articles/openai-harmony + return [_parse_final_message(message)] + + raise ValueError(f"Unknown channel: {message.channel}") + + +# --------------------------------------------------------------------------- +# 4. Public output parsing functions +# --------------------------------------------------------------------------- + + +def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]: + """Parse a Harmony message into a list of output response items. + + This is the main dispatcher that routes based on channel and recipient. + """ + if message.author.role != "assistant": + # This is a message from a tool to the assistant (e.g., search result). + # Don't include it in the final output for now. This aligns with + # OpenAI's behavior on models like o4-mini. + return [] + + output_items: list[ResponseOutputItem] = [] + recipient = message.recipient + + if recipient is not None: + # Browser tool calls (browser.search, browser.open, browser.find) + if recipient.startswith("browser."): + output_items.append(_parse_browser_tool_call(message, recipient)) + + # Function calls (should only happen on commentary channel) + elif message.channel == "commentary" and recipient.startswith("functions."): + output_items.extend(_parse_function_call(message, recipient)) + + # Built-in MCP tools (python, browser, container) + elif recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL: + output_items.extend(_parse_reasoning(message)) + + # All other recipients are MCP calls + else: + output_items.extend(_parse_mcp_call(message, recipient)) + + # No recipient - handle based on channel for non-tool messages + else: + output_items.extend(_parse_message_no_recipient(message)) + + return output_items + + +def parser_state_to_response_output( + parser: StreamableParser, +) -> list[ResponseOutputItem]: + """Extract in-progress response items from incomplete parser state. + + Called when the parser has buffered content that hasn't formed a + complete message yet (e.g., generation was cut short). + """ + if not parser.current_content: + return [] + if parser.current_role != Role.ASSISTANT: + return [] + current_recipient = parser.current_recipient + if current_recipient is not None and current_recipient.startswith("browser."): + return [] + + if current_recipient and parser.current_channel in ("commentary", "analysis"): + if current_recipient.startswith("functions."): + rid = random_uuid() + return [ + ResponseFunctionToolCall( + arguments=parser.current_content, + call_id=f"call_{rid}", + type="function_call", + name=current_recipient.split(".")[-1], + id=f"fc_{rid}", + status="in_progress", + ) + ] + # Built-in MCP tools (python, browser, container) + elif current_recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL: + return [ + ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent( + text=parser.current_content, type="reasoning_text" + ) + ], + status=None, + ) + ] + # All other recipients are MCP calls + else: + rid = random_uuid() + server_label, tool_name = _parse_mcp_recipient(current_recipient) + return [ + McpCall( + arguments=parser.current_content, + type="mcp_call", + name=tool_name, + server_label=server_label, + id=f"mcp_{rid}", + status="in_progress", + ) + ] + + if parser.current_channel == "commentary": + # Per Harmony format, preambles (commentary with no recipient) are + # intended to be shown to end-users, unlike analysis channel content. + output_text = ResponseOutputText( + text=parser.current_content, + annotations=[], + type="output_text", + logprobs=None, + ) + return [ + ResponseOutputMessage( + id=f"msg_{random_uuid()}", + content=[output_text], + role="assistant", + status="incomplete", + type="message", + ) + ] + + if parser.current_channel == "analysis": + return [ + ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent( + text=parser.current_content, type="reasoning_text" + ) + ], + status=None, + ) + ] + + if parser.current_channel == "final": + output_text = ResponseOutputText( + text=parser.current_content, + annotations=[], # TODO + type="output_text", + logprobs=None, # TODO + ) + text_item = ResponseOutputMessage( + id=f"msg_{random_uuid()}", + content=[output_text], + role="assistant", + # if the parser still has messages (ie if the generator got cut + # abruptly), this should be incomplete + status="incomplete", + type="message", + ) + return [text_item] + + return [] diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index c0ca87a98..b9d526e25 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -58,15 +58,11 @@ from vllm.entrypoints.openai.engine.serving import ( ) from vllm.entrypoints.openai.models.serving import OpenAIServingModels from vllm.entrypoints.openai.parser.harmony_utils import ( - construct_harmony_previous_input_messages, get_developer_message, get_stop_tokens_for_assistant_actions, get_system_message, get_user_message, has_custom_tools, - parse_output_message, - parse_remaining_state, - parse_response_input, render_for_completion, ) from vllm.entrypoints.openai.responses.context import ( @@ -76,6 +72,12 @@ from vllm.entrypoints.openai.responses.context import ( SimpleContext, StreamingHarmonyContext, ) +from vllm.entrypoints.openai.responses.harmony import ( + construct_harmony_previous_input_messages, + harmony_to_response_output, + parser_state_to_response_output, + response_input_to_harmony, +) from vllm.entrypoints.openai.responses.protocol import ( InputTokensDetails, OutputTokensDetails, @@ -954,9 +956,9 @@ class OpenAIServingResponses(OpenAIServing): output_items: list[ResponseOutputItem] = [] num_init_messages = context.num_init_messages for msg in context.messages[num_init_messages:]: - output_items.extend(parse_output_message(msg)) + output_items.extend(harmony_to_response_output(msg)) # Handle the generation stopped in the middle (if any). - last_items = parse_remaining_state(context.parser) + last_items = parser_state_to_response_output(context.parser) if last_items: output_items.extend(last_items) return output_items @@ -1103,13 +1105,13 @@ class OpenAIServingResponses(OpenAIServing): else: prev_outputs = [] for response_msg in request.input: - new_msg = parse_response_input(response_msg, prev_outputs) + new_msg = response_input_to_harmony(response_msg, prev_outputs) if new_msg.author.role != "system": messages.append(new_msg) # User passes in a tool call request and its output. We need - # to add the tool call request to prev_outputs so that the - # parse_response_input can find the tool call request when + # to add the tool call request to prev_outputs so that + # response_input_to_harmony can find the tool call request when # parsing the tool call output. if isinstance(response_msg, ResponseFunctionToolCall): prev_outputs.append(response_msg)