[GPT-OSS] Structure_Tag support for gpt-oss tool-call in cot (#25515)

Signed-off-by: Hanchenli <lihanc2002@gmail.com>
Signed-off-by: Hanchenli <61769611+Hanchenli@users.noreply.github.com>
Signed-off-by: Wei Wei <wwei6@meta.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Wei Wei <wwei6@meta.com>
Co-authored-by: Wei Wei <weiweinpu@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Hanchenli
2025-10-17 21:55:54 -07:00
committed by GitHub
parent c312320764
commit 7c572544e4
14 changed files with 911 additions and 32 deletions

View File

@@ -0,0 +1,172 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for GPT-OSS structural tag support in reasoning (PR #25515)."""
import json
from unittest.mock import Mock
import pytest
from vllm.entrypoints.tool_server import ToolServer
from vllm.reasoning.gptoss_reasoning_parser import (
GptOssReasoningParser,
from_builtin_tool_to_tag,
no_func_reaonsing_tag,
tag_with_builtin_funcs,
)
class TestGptOssReasoningParser:
"""Test cases for GptOssReasoningParser structural tag functionality."""
@pytest.fixture
def mock_tokenizer(self):
"""Create a mock tokenizer for testing."""
tokenizer = Mock()
tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
return tokenizer
@pytest.fixture
def reasoning_parser(self, mock_tokenizer):
"""Create a GptOssReasoningParser instance."""
return GptOssReasoningParser(mock_tokenizer)
@pytest.fixture
def mock_tool_server_empty(self):
"""Create a mock ToolServer with no tools."""
tool_server = Mock(spec=ToolServer)
tool_server.has_tool = Mock(return_value=False)
return tool_server
@pytest.fixture
def mock_tool_server_with_browser(self):
"""Create a mock ToolServer with browser tool."""
tool_server = Mock(spec=ToolServer)
tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
return tool_server
@pytest.fixture
def mock_tool_server_with_all_tools(self):
"""Create a mock ToolServer with all builtin tools."""
tool_server = Mock(spec=ToolServer)
tool_server.has_tool = Mock(
side_effect=lambda tool: tool in ["browser", "python", "container"]
)
return tool_server
def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
"""Test prepare_structured_tag with no tool server."""
result = reasoning_parser.prepare_structured_tag(None, None)
expected = json.dumps(no_func_reaonsing_tag)
assert result == expected
# Verify the structure is correct
parsed = json.loads(result)
assert parsed["type"] == "structural_tag"
assert parsed["format"]["type"] == "triggered_tags"
assert len(parsed["format"]["tags"]) == 1
assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
def test_prepare_structured_tag_with_all_tools(
self, reasoning_parser, mock_tool_server_with_all_tools
):
"""Test prepare_structured_tag with all builtin tools."""
result = reasoning_parser.prepare_structured_tag(
None, mock_tool_server_with_all_tools
)
parsed = json.loads(result)
# Should have analysis tag + tags for all 3 tools (2 tags each)
assert len(parsed["format"]["tags"]) == 7 # 1 analysis + 6 tool tags
# Check all tool tags are present
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
for tool in ["browser", "python", "container"]:
assert f"<|channel|>commentary to={tool}" in tag_begins
assert f"<|channel|>analysis to={tool}" in tag_begins
def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
"""Test prepare_structured_tag when original_tag is provided."""
original_tag = '{"custom": "tag"}'
result = reasoning_parser.prepare_structured_tag(original_tag, None)
# Should return the original tag unchanged
assert result == original_tag
def test_from_builtin_tool_to_tag(self):
"""Test from_builtin_tool_to_tag function."""
tags = from_builtin_tool_to_tag("python")
assert len(tags) == 2
assert tags[0]["begin"] == "<|channel|>commentary to=python"
assert tags[0]["content"]["type"] == "any_text"
assert tags[0]["end"] == "<|end|>"
assert tags[1]["begin"] == "<|channel|>analysis to=python"
assert tags[1]["content"]["type"] == "any_text"
assert tags[1]["end"] == "<|end|>"
def test_tag_with_builtin_funcs(self):
"""Test tag_with_builtin_funcs function."""
builtin_tools = ["browser", "python"]
result = tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tools)
assert result["type"] == "structural_tag"
# Should have original analysis tag + 2 tags per tool
assert len(result["format"]["tags"]) == 5 # 1 + 2*2
# Should have added commentary trigger
assert "<|channel|>commentary to=" in result["format"]["triggers"]
assert "<|channel|>analysis" in result["format"]["triggers"]
def test_tag_structure_invariants(self):
"""Test that the basic tag structure follows expected format."""
# Test the base no_func_reaonsing_tag structure
assert no_func_reaonsing_tag["type"] == "structural_tag"
assert no_func_reaonsing_tag["format"]["type"] == "triggered_tags"
assert no_func_reaonsing_tag["format"]["stop_after_first"] is False
# Verify analysis tag structure
analysis_tag = no_func_reaonsing_tag["format"]["tags"][0]
assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
assert analysis_tag["content"]["type"] == "any_text"
assert analysis_tag["end"] == "<|end|>"
def test_json_serialization_valid(
self, reasoning_parser, mock_tool_server_with_all_tools
):
"""Test that all generated tags produce valid JSON."""
# Test with no tool server
result1 = reasoning_parser.prepare_structured_tag(None, None)
json.loads(result1) # Should not raise
# Test with empty tool server
empty_server = Mock(spec=ToolServer)
empty_server.has_tool = Mock(return_value=False)
result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
json.loads(result2) # Should not raise
# Test with tools
result3 = reasoning_parser.prepare_structured_tag(
None, mock_tool_server_with_all_tools
)
json.loads(result3) # Should not raise
@pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
def test_single_tool_integration(self, reasoning_parser, tool_name):
"""Test integration with individual tools."""
tool_server = Mock(spec=ToolServer)
tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
result = reasoning_parser.prepare_structured_tag(None, tool_server)
parsed = json.loads(result)
# Should have 1 analysis + 2 tool-specific tags
assert len(parsed["format"]["tags"]) == 3
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
assert f"<|channel|>commentary to={tool_name}" in tag_begins
assert f"<|channel|>analysis to={tool_name}" in tag_begins

View File

@@ -0,0 +1,207 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for reasoning-aware structured output functionality (PR #25515)."""
from unittest.mock import Mock
import pytest
from vllm.config import ModelConfig, SchedulerConfig, VllmConfig
from vllm.reasoning import ReasoningParser
from vllm.v1.request import Request
from vllm.v1.structured_output import StructuredOutputManager
class TestReasoningStructuredOutput:
"""Test reasoning-aware structured output functionality."""
@pytest.fixture
def mock_model_config(self):
"""Create a mock ModelConfig."""
config = Mock(spec=ModelConfig)
config.skip_tokenizer_init = True # Skip tokenizer init to avoid network calls
config.get_vocab_size = Mock(return_value=50000)
# Add missing runner_type attribute that tokenizer initialization expects
config.runner_type = "generate"
# Add other attributes that tokenizer initialization might need
config.tokenizer = "test-tokenizer"
config.tokenizer_mode = "auto"
config.trust_remote_code = False
config.tokenizer_revision = None
return config
@pytest.fixture
def mock_scheduler_config(self):
"""Create a mock SchedulerConfig."""
config = Mock(spec=SchedulerConfig)
config.max_num_seqs = 128
return config
@pytest.fixture
def mock_vllm_config(self, mock_model_config, mock_scheduler_config):
"""Create a mock VllmConfig."""
config = Mock(spec=VllmConfig)
config.model_config = mock_model_config
config.scheduler_config = mock_scheduler_config
config.structured_outputs_config = Mock()
config.structured_outputs_config.reasoning_parser = None
config.structured_outputs_config.enable_in_reasoning = False
config.speculative_config = None
return config
@pytest.fixture
def mock_reasoning_parser(self):
"""Create a mock ReasoningParser."""
parser = Mock(spec=ReasoningParser)
parser.is_reasoning_end = Mock(return_value=False)
return parser
@pytest.fixture
def mock_request_with_structured_output(self):
"""Create a mock request with structured output."""
request = Mock(spec=Request)
request.structured_output_request = Mock()
request.structured_output_request.reasoning_ended = None
request.structured_output_request.grammar = Mock()
request.structured_output_request.grammar.is_terminated = Mock(
return_value=False
)
request.use_structured_output = True
request.prompt_token_ids = [1, 2, 3, 4, 5]
request.all_token_ids = [1, 2, 3, 4, 5, 6, 7, 8]
return request
def test_should_fill_bitmask_with_enable_in_reasoning(
self, mock_vllm_config, mock_request_with_structured_output
):
"""Test should_fill_bitmask when enable_in_reasoning is True."""
# Enable enable_in_reasoning
mock_vllm_config.structured_outputs_config.enable_in_reasoning = True
manager = StructuredOutputManager(mock_vllm_config)
# Should always return True when enable_in_reasoning is enabled
result = manager.should_fill_bitmask(mock_request_with_structured_output)
assert result is True
def test_should_fill_bitmask_without_enable_in_reasoning(
self,
mock_vllm_config,
mock_request_with_structured_output,
mock_reasoning_parser,
):
"""Test should_fill_bitmask when enable_in_reasoning is False."""
# Keep enable_in_reasoning as False (default)
config = mock_vllm_config.structured_outputs_config
assert config.enable_in_reasoning is False
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = mock_reasoning_parser
# Mock reasoning not ended
mock_reasoning_parser.is_reasoning_end.return_value = False
result = manager.should_fill_bitmask(mock_request_with_structured_output)
# Should set reasoning_ended and return its value
assert (
mock_request_with_structured_output.structured_output_request.reasoning_ended
is False
)
assert result is False
def test_should_fill_bitmask_no_reasoner(
self, mock_vllm_config, mock_request_with_structured_output
):
"""Test should_fill_bitmask when no reasoner is configured."""
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = None
result = manager.should_fill_bitmask(mock_request_with_structured_output)
# Should default to True when no reasoner
assert result is True
def test_should_advance_with_enable_in_reasoning(
self,
mock_vllm_config,
mock_request_with_structured_output,
mock_reasoning_parser,
):
"""Test should_advance when enable_in_reasoning is True."""
# Enable enable_in_reasoning
mock_vllm_config.structured_outputs_config.enable_in_reasoning = True
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = mock_reasoning_parser
# Should always return True when enable_in_reasoning is enabled
result = manager.should_advance(mock_request_with_structured_output)
assert result is True
def test_should_advance_reasoning_not_ended(
self,
mock_vllm_config,
mock_request_with_structured_output,
mock_reasoning_parser,
):
"""Test should_advance when reasoning has not ended."""
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = mock_reasoning_parser
# Set reasoning as not ended
(
mock_request_with_structured_output.structured_output_request
).reasoning_ended = False
mock_reasoning_parser.is_reasoning_end.return_value = False
result = manager.should_advance(mock_request_with_structured_output)
# Should return False since reasoning hasn't ended
assert result is False
def test_should_advance_reasoning_just_ended(
self,
mock_vllm_config,
mock_request_with_structured_output,
mock_reasoning_parser,
):
"""Test should_advance when reasoning ends in current step."""
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = mock_reasoning_parser
# Set reasoning as not ended initially, but ends in this step
(
mock_request_with_structured_output.structured_output_request
).reasoning_ended = False
mock_reasoning_parser.is_reasoning_end.return_value = True
result = manager.should_advance(mock_request_with_structured_output)
# Should set reasoning_ended to True but return False for this step
assert (
mock_request_with_structured_output.structured_output_request.reasoning_ended
is True
)
assert result is False
def test_should_advance_reasoning_already_ended(
self,
mock_vllm_config,
mock_request_with_structured_output,
mock_reasoning_parser,
):
"""Test should_advance when reasoning has already ended."""
manager = StructuredOutputManager(mock_vllm_config)
manager.reasoner = mock_reasoning_parser
# Set reasoning as already ended
(
mock_request_with_structured_output.structured_output_request
).reasoning_ended = True
result = manager.should_advance(mock_request_with_structured_output)
# Should return True since reasoning has ended
assert result is True