[Frontend] [gpt-oss] Mcp type bug (#27689)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Signed-off-by: Alec Solder <alecs@fb.com> Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com> Co-authored-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Co-authored-by: Alec Solder <alecs@fb.com> Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com>
2025-10-29 06:01:32 -04:00
parent 3c7fefdeba
commit ab2eb27b74
7 changed files with 293 additions and 44 deletions
--- a/tests/entrypoints/openai/test_response_api_mcp_tools.py
+++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py
@@ -26,6 +26,8 @@ def mcp_disabled_server(monkeypatch_module: pytest.MonkeyPatch):
    with monkeypatch_module.context() as m:
        m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
+        # Helps the model follow instructions better
+        m.setenv("VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS", "1")
        with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
            yield remote_server

@@ -37,7 +39,9 @@ def mcp_enabled_server(monkeypatch_module: pytest.MonkeyPatch):
    with monkeypatch_module.context() as m:
        m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
-        m.setenv("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", "code_interpreter,container")
+        m.setenv("VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS", "code_interpreter,container")
+        # Helps the model follow instructions better
+        m.setenv("VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS", "1")
        with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
            yield remote_server

@@ -56,18 +60,15 @@ async def mcp_enabled_client(mcp_enabled_server):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, model_name: str):
    response = await mcp_enabled_client.responses.create(
        model=model_name,
-        # TODO: Ideally should be able to set max tool calls
-        # to prevent multi-turn, but it is not currently supported
-        # would speed up the test
        input=(
-            "What's the first 4 digits after the decimal point of "
-            "cube root of `19910212 * 20250910`? "
-            "Show only the digits. The python interpreter is not stateful "
-            "and you must print to see the output."
+            "Execute the following code: "
+            "import random; print(random.randint(1, 1000000))"
+        ),
+        instructions=(
+            "You must use the Python tool to execute code. Never simulate execution."
        ),
        tools=[
            {
@@ -77,26 +78,47 @@ async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, model_name:
                "server_url": "http://localhost:8888",
            }
        ],
+        extra_body={"enable_response_messages": True},
    )
    assert response is not None
    assert response.status == "completed"
-    assert response.usage.output_tokens_details.tool_output_tokens > 0
+    # Verify output messages: Tool calls and responses on analysis channel
+    tool_call_found = False
+    tool_response_found = False
+    for message in response.output_messages:
+        recipient = message.get("recipient")
+        if recipient and recipient.startswith("python"):
+            tool_call_found = True
+            assert message.get("channel") == "analysis", (
+                "Tool call should be on analysis channel"
+            )
+        author = message.get("author", {})
+        if (
+            author.get("role") == "tool"
+            and author.get("name")
+            and author.get("name").startswith("python")
+        ):
+            tool_response_found = True
+            assert message.get("channel") == "analysis", (
+                "Tool response should be on analysis channel"
+            )
+
+    assert tool_call_found, "Should have found at least one Python tool call"
+    assert tool_response_found, "Should have found at least one Python tool response"
+    for message in response.input_messages:
+        assert message.get("author").get("role") != "developer", (
+            "No developer messages should be present with valid mcp tool"
+        )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_name: str):
    response = await mcp_disabled_client.responses.create(
        model=model_name,
-        # TODO: Ideally should be able to set max tool calls
-        # to prevent multi-turn, but it is not currently supported
-        # would speed up the test
        input=(
-            "What's the first 4 digits after the decimal point of "
-            "cube root of `19910212 * 20250910`? "
-            "Show only the digits. The python interpreter is not stateful "
-            "and you must print to see the output."
+            "Execute the following code if the tool is present: "
+            "import random; print(random.randint(1, 1000000))"
        ),
        tools=[
            {
@@ -106,7 +128,34 @@ async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_nam
                "server_url": "http://localhost:8888",
            }
        ],
+        extra_body={"enable_response_messages": True},
    )
    assert response is not None
    assert response.status == "completed"
-    assert response.usage.output_tokens_details.tool_output_tokens == 0
+    # Verify output messages: No tool calls and responses
+    tool_call_found = False
+    tool_response_found = False
+    for message in response.output_messages:
+        recipient = message.get("recipient")
+        if recipient and recipient.startswith("python"):
+            tool_call_found = True
+            assert message.get("channel") == "analysis", (
+                "Tool call should be on analysis channel"
+            )
+        author = message.get("author", {})
+        if (
+            author.get("role") == "tool"
+            and author.get("name")
+            and author.get("name").startswith("python")
+        ):
+            tool_response_found = True
+            assert message.get("channel") == "analysis", (
+                "Tool response should be on analysis channel"
+            )
+
+    assert not tool_call_found, "Should not have a python call"
+    assert not tool_response_found, "Should not have a tool response"
+    for message in response.input_messages:
+        assert message.get("author").get("role") != "developer", (
+            "No developer messages should be present without a valid tool"
+        )