[Refactor] Consolidate GPT-OSS reasoning parser tests (#36915)

Signed-off-by: sfeng33 <4florafeng@gmail.com> Signed-off-by: Flora Feng <4florafeng@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-03-16 15:53:07 -04:00
parent 714c6e0eab
commit dfa8852db2
4 changed files with 145 additions and 459 deletions
--- a/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py
+++ b/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py
@@ -1,279 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-"""Integration tests for GPT-OSS structural tags functionality (PR #25515)."""
-
-import json
-from unittest.mock import Mock
-
-import pytest
-
-from vllm.entrypoints.mcp.tool_server import ToolServer
-from vllm.reasoning.gptoss_reasoning_parser import (
-    GptOssReasoningParser,
-)
-from vllm.sampling_params import StructuredOutputsParams
-
-
-class TestGptOssStructuralTagsIntegration:
-    """Integration tests for structural tags in GPT-OSS tool calls."""
-
-    @pytest.fixture
-    def mock_tokenizer(self):
-        """Create a mock tokenizer."""
-        tokenizer = Mock()
-        tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
-        tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
-        return tokenizer
-
-    @pytest.fixture
-    def gptoss_parser(self, mock_tokenizer):
-        """Create a real GptOssReasoningParser instance."""
-        return GptOssReasoningParser(mock_tokenizer)
-
-    @pytest.fixture
-    def tool_server_with_python(self):
-        """Create a tool server with Python tool enabled."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=lambda tool: tool == "python")
-        return tool_server
-
-    @pytest.fixture
-    def tool_server_empty(self):
-        """Create a tool server with no tools."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(return_value=False)
-        return tool_server
-
-    def test_end_to_end_no_tools(self, gptoss_parser):
-        """Test end-to-end flow when no tools are available."""
-        # Test the parser directly
-        result = gptoss_parser.prepare_structured_tag(None, None)
-        parsed_result = json.loads(result)
-
-        # Verify basic structure
-        assert parsed_result["type"] == "structural_tag"
-        assert parsed_result["format"]["type"] == "triggered_tags"
-        assert len(parsed_result["format"]["tags"]) == 1
-
-        # Verify only analysis channel is allowed
-        analysis_tag = parsed_result["format"]["tags"][0]
-        assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
-        assert analysis_tag["content"]["type"] == "any_text"
-        assert analysis_tag["end"] == "<|end|>"
-
-        # Verify triggers
-        assert parsed_result["format"]["triggers"] == ["<|channel|>analysis"]
-        assert parsed_result["format"]["stop_after_first"] is False
-
-    def test_end_to_end_with_python_tool(self, gptoss_parser, tool_server_with_python):
-        """Test end-to-end flow with Python tool enabled."""
-        result = gptoss_parser.prepare_structured_tag(None, tool_server_with_python)
-        parsed_result = json.loads(result)
-
-        # Should have analysis tag + 2 python tags
-        assert len(parsed_result["format"]["tags"]) == 3
-
-        # Verify all expected tags are present
-        tag_begins = [tag["begin"] for tag in parsed_result["format"]["tags"]]
-        expected_begins = [
-            "<|channel|>analysis<|message|>",
-            "<|channel|>commentary to=python",
-            "<|channel|>analysis to=python",
-        ]
-
-        for expected in expected_begins:
-            assert expected in tag_begins
-
-        # Verify triggers include commentary
-        assert "<|channel|>analysis" in parsed_result["format"]["triggers"]
-        assert "<|channel|>commentary to=" in parsed_result["format"]["triggers"]
-
-    def test_structured_outputs_params_integration(
-        self, gptoss_parser, tool_server_with_python
-    ):
-        """Test integration with StructuredOutputsParams."""
-        # Generate structural tag
-        structural_tag = gptoss_parser.prepare_structured_tag(
-            None, tool_server_with_python
-        )
-
-        # Create StructuredOutputsParams
-        params = StructuredOutputsParams(structural_tag=structural_tag)
-
-        # Verify the tag is properly stored and accessible
-        assert params.structural_tag == structural_tag
-
-        # Verify the tag is valid JSON
-        parsed_tag = json.loads(params.structural_tag)
-        assert parsed_tag["type"] == "structural_tag"
-
-    @pytest.mark.parametrize(
-        "browser, python, container, expected_tags",
-        [
-            # No tools
-            (False, False, False, 1),
-            # Single tool
-            (True, False, False, 3),
-            # Multiple tools
-            (True, True, False, 5),
-            # All tools
-            (True, True, True, 7),
-        ],
-    )
-    def test_tool_server_interaction_flow(
-        self, gptoss_parser, browser, python, container, expected_tags
-    ):
-        """Test the complete tool server interaction flow."""
-
-        # Create a mock ToolServer
-        tool_server = Mock(spec=ToolServer)
-
-        # Simulate tool availability based on parameters
-        tool_server.has_tool = Mock(
-            side_effect=lambda tool: {
-                "browser": browser,
-                "python": python,
-                "container": container,
-            }.get(tool, False)
-        )
-
-        # Run the parser and verify results
-        result = gptoss_parser.prepare_structured_tag(None, tool_server)
-        parsed_result = json.loads(result)
-
-        # Validate number of tags
-        assert len(parsed_result["format"]["tags"]) == expected_tags
-
-        # Verify tool-specific tags exist for enabled tools
-        tag_begins = [tag["begin"] for tag in parsed_result["format"]["tags"]]
-        for tool, enabled in {
-            "browser": browser,
-            "python": python,
-            "container": container,
-        }.items():
-            if enabled:
-                assert f"<|channel|>commentary to={tool}" in tag_begins
-                assert f"<|channel|>analysis to={tool}" in tag_begins
-
-    def test_original_tag_preservation(self, gptoss_parser, tool_server_with_python):
-        """Test that original tags are preserved when provided."""
-        original_tag = '{"type": "custom_tag", "data": "preserved"}'
-
-        result = gptoss_parser.prepare_structured_tag(
-            original_tag, tool_server_with_python
-        )
-
-        # Should return original tag unchanged
-        assert result == original_tag
-
-    @pytest.mark.parametrize(
-        "tools",
-        [
-            [],
-            ["browser"],
-            ["python"],
-            ["container"],
-            ["browser", "python"],
-            ["browser", "container"],
-            ["python", "container"],
-            ["browser", "python", "container"],
-        ],
-    )
-    def test_json_validity_comprehensive(self, gptoss_parser, tools):
-        """Test JSON validity across all possible tool combinations."""
-
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=lambda tool: tool in tools)
-
-        result = gptoss_parser.prepare_structured_tag(None, tool_server)
-
-        # Should be valid JSON
-        parsed_result = json.loads(result)
-
-        # Should have correct structure
-        assert parsed_result["type"] == "structural_tag"
-        assert "format" in parsed_result
-        assert "tags" in parsed_result["format"]
-        assert "triggers" in parsed_result["format"]
-
-        # Tag count should be: 1 (analysis) + 2 * len(tools)
-        expected_tag_count = 1 + (2 * len(tools))
-        assert len(parsed_result["format"]["tags"]) == expected_tag_count
-
-    def test_error_handling_invalid_tool_server(self, gptoss_parser):
-        """Test error handling with invalid tool server."""
-        # Tool server that raises exceptions
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=Exception("Tool server error"))
-
-        # Should handle gracefully and still return a valid tag
-        with pytest.raises(Exception, match="Tool server error"):
-            gptoss_parser.prepare_structured_tag(None, tool_server)
-
-    def test_concurrent_requests_isolation(self, gptoss_parser):
-        """Test that concurrent requests don't interfere with each other."""
-        # Simulate concurrent requests with different tool servers
-        tool_server_1 = Mock(spec=ToolServer)
-        tool_server_1.has_tool = Mock(side_effect=lambda tool: tool == "python")
-
-        tool_server_2 = Mock(spec=ToolServer)
-        tool_server_2.has_tool = Mock(side_effect=lambda tool: tool == "browser")
-
-        # Generate tags concurrently
-        result_1 = gptoss_parser.prepare_structured_tag(None, tool_server_1)
-        result_2 = gptoss_parser.prepare_structured_tag(None, tool_server_2)
-
-        # Parse results
-        parsed_1 = json.loads(result_1)
-        parsed_2 = json.loads(result_2)
-
-        # Verify they have different tool configurations
-        tags_1 = [tag["begin"] for tag in parsed_1["format"]["tags"]]
-        tags_2 = [tag["begin"] for tag in parsed_2["format"]["tags"]]
-
-        # Result 1 should have python tags
-        assert "<|channel|>commentary to=python" in tags_1
-        assert "<|channel|>commentary to=browser" not in tags_1
-
-        # Result 2 should have browser tags
-        assert "<|channel|>commentary to=browser" in tags_2
-        assert "<|channel|>commentary to=python" not in tags_2
-
-    def test_tag_format_consistency(self, gptoss_parser):
-        """Test that all generated tags follow consistent format."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(
-            side_effect=lambda tool: tool in ["python", "browser"]
-        )
-
-        result = gptoss_parser.prepare_structured_tag(None, tool_server)
-        parsed_result = json.loads(result)
-
-        # Verify all tags have required fields
-        for tag in parsed_result["format"]["tags"]:
-            assert "begin" in tag
-            assert "content" in tag
-            assert "end" in tag
-            assert tag["content"]["type"] == "any_text"
-            assert tag["end"] == "<|end|>"
-
-            # Verify begin format
-            assert tag["begin"].startswith("<|channel|>")
-
-    def test_trigger_configuration(self, gptoss_parser):
-        """Test trigger configuration for different tool setups."""
-        # Test with no tools
-        result_no_tools = gptoss_parser.prepare_structured_tag(None, None)
-        parsed_no_tools = json.loads(result_no_tools)
-        assert parsed_no_tools["format"]["triggers"] == ["<|channel|>analysis"]
-
-        # Test with tools
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=lambda tool: tool == "python")
-
-        result_with_tools = gptoss_parser.prepare_structured_tag(None, tool_server)
-        parsed_with_tools = json.loads(result_with_tools)
-
-        expected_triggers = ["<|channel|>analysis", "<|channel|>commentary to="]
-        assert set(parsed_with_tools["format"]["triggers"]) == set(expected_triggers)
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -1,11 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import json
+from unittest.mock import Mock
+
 import pytest
 from transformers import AutoTokenizer

+from vllm.entrypoints.mcp.tool_server import ToolServer
 from vllm.reasoning import ReasoningParser
-from vllm.reasoning.gptoss_reasoning_parser import GptOssReasoningParser
+from vllm.reasoning.gptoss_reasoning_parser import (
+    GptOssReasoningParser,
+    from_builtin_tool_to_tag,
+    no_func_reasoning_tag,
+)

 REASONING_MODEL_NAME = "openai/gpt-oss-120b"

@@ -142,3 +150,133 @@ def test_gptoss_is_reasoning_end(
    output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(output)
    actual_is_reasoning_end = parser.is_reasoning_end(output_ids)
    assert is_reasoning_end == actual_is_reasoning_end
+
+
+class TestGptOssStructuralTags:
+    """Test cases for GptOssReasoningParser structural tag functionality."""
+
+    @pytest.fixture
+    def mock_tokenizer(self):
+        """Create a mock tokenizer for testing."""
+        tokenizer = Mock()
+        tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
+        tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
+        return tokenizer
+
+    @pytest.fixture
+    def reasoning_parser(self, mock_tokenizer):
+        """Create a GptOssReasoningParser instance."""
+        return GptOssReasoningParser(mock_tokenizer)
+
+    def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
+        """Test prepare_structured_tag with no tool server."""
+        result = reasoning_parser.prepare_structured_tag(None, None)
+        expected = json.dumps(no_func_reasoning_tag)
+
+        assert result == expected
+
+        # Verify the structure is correct
+        parsed = json.loads(result)
+        assert parsed["type"] == "structural_tag"
+        assert parsed["format"]["type"] == "triggered_tags"
+        assert len(parsed["format"]["tags"]) == 1
+        assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
+        assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
+
+    def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
+        """Test prepare_structured_tag when original_tag is provided."""
+        original_tag = '{"custom": "tag"}'
+        result = reasoning_parser.prepare_structured_tag(original_tag, None)
+
+        # Should return the original tag unchanged
+        assert result == original_tag
+
+    def test_from_builtin_tool_to_tag(self):
+        """Test from_builtin_tool_to_tag function."""
+        tags = from_builtin_tool_to_tag("python")
+
+        assert len(tags) == 2
+        assert tags[0]["begin"] == "<|channel|>commentary to=python"
+        assert tags[0]["content"]["type"] == "any_text"
+        assert tags[0]["end"] == "<|end|>"
+
+        assert tags[1]["begin"] == "<|channel|>analysis to=python"
+        assert tags[1]["content"]["type"] == "any_text"
+        assert tags[1]["end"] == "<|end|>"
+
+    @pytest.mark.parametrize(
+        "tools",
+        [
+            [],
+            ["browser"],
+            ["python"],
+            ["container"],
+            ["browser", "python"],
+            ["browser", "container"],
+            ["python", "container"],
+            ["browser", "python", "container"],
+        ],
+    )
+    def test_json_validity_comprehensive(self, reasoning_parser, tools):
+        """Test JSON validity across all possible tool combinations."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(side_effect=lambda tool: tool in tools)
+
+        result = reasoning_parser.prepare_structured_tag(None, tool_server)
+        parsed_result = json.loads(result)
+
+        assert parsed_result["type"] == "structural_tag"
+        assert "format" in parsed_result
+        assert "tags" in parsed_result["format"]
+        assert "triggers" in parsed_result["format"]
+
+        # Tag count should be: 1 (analysis) + 2 * len(tools)
+        expected_tag_count = 1 + (2 * len(tools))
+        assert len(parsed_result["format"]["tags"]) == expected_tag_count
+
+        # Verify triggers are correctly configured
+        expected_triggers = ["<|channel|>analysis"]
+        if tools:
+            expected_triggers.append("<|channel|>commentary to=")
+        assert set(parsed_result["format"]["triggers"]) == set(expected_triggers)
+
+    def test_no_cross_request_state_pollution(self, reasoning_parser):
+        """Test that sequential calls with different tool servers produce
+        independent results, guarding against shared mutable state
+        (e.g. missing deepcopy in tag_with_builtin_funcs)."""
+        tool_server_1 = Mock(spec=ToolServer)
+        tool_server_1.has_tool = Mock(side_effect=lambda tool: tool == "python")
+
+        tool_server_2 = Mock(spec=ToolServer)
+        tool_server_2.has_tool = Mock(side_effect=lambda tool: tool == "browser")
+
+        result_1 = reasoning_parser.prepare_structured_tag(None, tool_server_1)
+        result_2 = reasoning_parser.prepare_structured_tag(None, tool_server_2)
+
+        tags_1 = [tag["begin"] for tag in json.loads(result_1)["format"]["tags"]]
+        tags_2 = [tag["begin"] for tag in json.loads(result_2)["format"]["tags"]]
+
+        assert "<|channel|>commentary to=python" in tags_1
+        assert "<|channel|>commentary to=browser" not in tags_1
+
+        assert "<|channel|>commentary to=browser" in tags_2
+        assert "<|channel|>commentary to=python" not in tags_2
+
+    def test_tag_format_consistency(self, reasoning_parser):
+        """Test that all generated tags follow consistent format,
+        catching malformed tags from from_builtin_tool_to_tag."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(
+            side_effect=lambda tool: tool in ["python", "browser"]
+        )
+
+        result = reasoning_parser.prepare_structured_tag(None, tool_server)
+        parsed_result = json.loads(result)
+
+        for tag in parsed_result["format"]["tags"]:
+            assert "begin" in tag
+            assert "content" in tag
+            assert "end" in tag
+            assert tag["content"]["type"] == "any_text"
+            assert tag["end"] == "<|end|>"
+            assert tag["begin"].startswith("<|channel|>")
--- a/tests/v1/structured_output/test_gptoss_structural_tags.py
+++ b/tests/v1/structured_output/test_gptoss_structural_tags.py
@@ -1,173 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-"""Unit tests for GPT-OSS structural tag support in reasoning (PR #25515)."""
-
-import json
-from unittest.mock import Mock
-
-import pytest
-
-from vllm.entrypoints.mcp.tool_server import ToolServer
-from vllm.reasoning.gptoss_reasoning_parser import (
-    GptOssReasoningParser,
-    from_builtin_tool_to_tag,
-    no_func_reaonsing_tag,
-    tag_with_builtin_funcs,
-)
-
-
-class TestGptOssReasoningParser:
-    """Test cases for GptOssReasoningParser structural tag functionality."""
-
-    @pytest.fixture
-    def mock_tokenizer(self):
-        """Create a mock tokenizer for testing."""
-        tokenizer = Mock()
-        tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
-        tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
-        return tokenizer
-
-    @pytest.fixture
-    def reasoning_parser(self, mock_tokenizer):
-        """Create a GptOssReasoningParser instance."""
-        return GptOssReasoningParser(mock_tokenizer)
-
-    @pytest.fixture
-    def mock_tool_server_empty(self):
-        """Create a mock ToolServer with no tools."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(return_value=False)
-        return tool_server
-
-    @pytest.fixture
-    def mock_tool_server_with_browser(self):
-        """Create a mock ToolServer with browser tool."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
-        return tool_server
-
-    @pytest.fixture
-    def mock_tool_server_with_all_tools(self):
-        """Create a mock ToolServer with all builtin tools."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(
-            side_effect=lambda tool: tool in ["browser", "python", "container"]
-        )
-        return tool_server
-
-    def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
-        """Test prepare_structured_tag with no tool server."""
-        result = reasoning_parser.prepare_structured_tag(None, None)
-        expected = json.dumps(no_func_reaonsing_tag)
-
-        assert result == expected
-
-        # Verify the structure is correct
-        parsed = json.loads(result)
-        assert parsed["type"] == "structural_tag"
-        assert parsed["format"]["type"] == "triggered_tags"
-        assert len(parsed["format"]["tags"]) == 1
-        assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
-        assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
-
-    def test_prepare_structured_tag_with_all_tools(
-        self, reasoning_parser, mock_tool_server_with_all_tools
-    ):
-        """Test prepare_structured_tag with all builtin tools."""
-        result = reasoning_parser.prepare_structured_tag(
-            None, mock_tool_server_with_all_tools
-        )
-        parsed = json.loads(result)
-
-        # Should have analysis tag + tags for all 3 tools (2 tags each)
-        assert len(parsed["format"]["tags"]) == 7  # 1 analysis + 6 tool tags
-
-        # Check all tool tags are present
-        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
-        for tool in ["browser", "python", "container"]:
-            assert f"<|channel|>commentary to={tool}" in tag_begins
-            assert f"<|channel|>analysis to={tool}" in tag_begins
-
-    def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
-        """Test prepare_structured_tag when original_tag is provided."""
-        original_tag = '{"custom": "tag"}'
-        result = reasoning_parser.prepare_structured_tag(original_tag, None)
-
-        # Should return the original tag unchanged
-        assert result == original_tag
-
-    def test_from_builtin_tool_to_tag(self):
-        """Test from_builtin_tool_to_tag function."""
-        tags = from_builtin_tool_to_tag("python")
-
-        assert len(tags) == 2
-        assert tags[0]["begin"] == "<|channel|>commentary to=python"
-        assert tags[0]["content"]["type"] == "any_text"
-        assert tags[0]["end"] == "<|end|>"
-
-        assert tags[1]["begin"] == "<|channel|>analysis to=python"
-        assert tags[1]["content"]["type"] == "any_text"
-        assert tags[1]["end"] == "<|end|>"
-
-    def test_tag_with_builtin_funcs(self):
-        """Test tag_with_builtin_funcs function."""
-        builtin_tools = ["browser", "python"]
-        result = tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tools)
-
-        assert result["type"] == "structural_tag"
-        # Should have original analysis tag + 2 tags per tool
-        assert len(result["format"]["tags"]) == 5  # 1 + 2*2
-
-        # Should have added commentary trigger
-        assert "<|channel|>commentary to=" in result["format"]["triggers"]
-        assert "<|channel|>analysis" in result["format"]["triggers"]
-
-    def test_tag_structure_invariants(self):
-        """Test that the basic tag structure follows expected format."""
-        # Test the base no_func_reaonsing_tag structure
-        assert no_func_reaonsing_tag["type"] == "structural_tag"
-        assert no_func_reaonsing_tag["format"]["type"] == "triggered_tags"
-        assert no_func_reaonsing_tag["format"]["stop_after_first"] is False
-
-        # Verify analysis tag structure
-        analysis_tag = no_func_reaonsing_tag["format"]["tags"][0]
-        assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
-        assert analysis_tag["content"]["type"] == "any_text"
-        assert analysis_tag["end"] == "<|end|>"
-
-    def test_json_serialization_valid(
-        self, reasoning_parser, mock_tool_server_with_all_tools
-    ):
-        """Test that all generated tags produce valid JSON."""
-        # Test with no tool server
-        result1 = reasoning_parser.prepare_structured_tag(None, None)
-        json.loads(result1)  # Should not raise
-
-        # Test with empty tool server
-        empty_server = Mock(spec=ToolServer)
-        empty_server.has_tool = Mock(return_value=False)
-        result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
-        json.loads(result2)  # Should not raise
-
-        # Test with tools
-        result3 = reasoning_parser.prepare_structured_tag(
-            None, mock_tool_server_with_all_tools
-        )
-        json.loads(result3)  # Should not raise
-
-    @pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
-    def test_single_tool_integration(self, reasoning_parser, tool_name):
-        """Test integration with individual tools."""
-        tool_server = Mock(spec=ToolServer)
-        tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
-
-        result = reasoning_parser.prepare_structured_tag(None, tool_server)
-        parsed = json.loads(result)
-
-        # Should have 1 analysis + 2 tool-specific tags
-        assert len(parsed["format"]["tags"]) == 3
-
-        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
-        assert f"<|channel|>commentary to={tool_name}" in tag_begins
-        assert f"<|channel|>analysis to={tool_name}" in tag_begins
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -18,7 +18,7 @@ if TYPE_CHECKING:

 logger = init_logger(__name__)

-no_func_reaonsing_tag = {
+no_func_reasoning_tag = {
    "type": "structural_tag",
    "format": {
        "type": "triggered_tags",
@@ -51,10 +51,10 @@ def from_builtin_tool_to_tag(tool: str) -> list[dict]:
    return tag


-def tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list: list[str]) -> dict:
+def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str]) -> dict:
    import copy

-    new_tag = copy.deepcopy(no_func_reaonsing_tag)
+    new_tag = copy.deepcopy(no_func_reasoning_tag)
    new_tag["format"]["triggers"].append("<|channel|>commentary to=")

    for tool in builtin_tool_list:
@@ -162,7 +162,7 @@ class GptOssReasoningParser(ReasoningParser):
    ) -> str | None:
        if original_tag is None:
            if tool_server is None:
-                return json.dumps(no_func_reaonsing_tag)
+                return json.dumps(no_func_reasoning_tag)
            else:
                builtin_tool_list: list[str] = []
                if tool_server.has_tool("browser"):
@@ -175,11 +175,11 @@ class GptOssReasoningParser(ReasoningParser):
                if len(builtin_tool_list) > 0:
                    logger.info("Builtin_tool_list: %s", builtin_tool_list)
                    func_tag = json.dumps(
-                        tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list)
+                        tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list)
                    )
                else:
                    logger.info("Builtin_tool_list is empty")
-                    func_tag = json.dumps(no_func_reaonsing_tag)
+                    func_tag = json.dumps(no_func_reasoning_tag)

                return func_tag
        else: