[Refactor] Consolidate GPT-OSS reasoning parser tests (#36915)
Signed-off-by: sfeng33 <4florafeng@gmail.com> Signed-off-by: Flora Feng <4florafeng@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1,279 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Integration tests for GPT-OSS structural tags functionality (PR #25515)."""
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
)
|
||||
from vllm.sampling_params import StructuredOutputsParams
|
||||
|
||||
|
||||
class TestGptOssStructuralTagsIntegration:
|
||||
"""Integration tests for structural tags in GPT-OSS tool calls."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tokenizer(self):
|
||||
"""Create a mock tokenizer."""
|
||||
tokenizer = Mock()
|
||||
tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
|
||||
tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
|
||||
return tokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def gptoss_parser(self, mock_tokenizer):
|
||||
"""Create a real GptOssReasoningParser instance."""
|
||||
return GptOssReasoningParser(mock_tokenizer)
|
||||
|
||||
@pytest.fixture
|
||||
def tool_server_with_python(self):
|
||||
"""Create a tool server with Python tool enabled."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == "python")
|
||||
return tool_server
|
||||
|
||||
@pytest.fixture
|
||||
def tool_server_empty(self):
|
||||
"""Create a tool server with no tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(return_value=False)
|
||||
return tool_server
|
||||
|
||||
def test_end_to_end_no_tools(self, gptoss_parser):
|
||||
"""Test end-to-end flow when no tools are available."""
|
||||
# Test the parser directly
|
||||
result = gptoss_parser.prepare_structured_tag(None, None)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Verify basic structure
|
||||
assert parsed_result["type"] == "structural_tag"
|
||||
assert parsed_result["format"]["type"] == "triggered_tags"
|
||||
assert len(parsed_result["format"]["tags"]) == 1
|
||||
|
||||
# Verify only analysis channel is allowed
|
||||
analysis_tag = parsed_result["format"]["tags"][0]
|
||||
assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert analysis_tag["content"]["type"] == "any_text"
|
||||
assert analysis_tag["end"] == "<|end|>"
|
||||
|
||||
# Verify triggers
|
||||
assert parsed_result["format"]["triggers"] == ["<|channel|>analysis"]
|
||||
assert parsed_result["format"]["stop_after_first"] is False
|
||||
|
||||
def test_end_to_end_with_python_tool(self, gptoss_parser, tool_server_with_python):
|
||||
"""Test end-to-end flow with Python tool enabled."""
|
||||
result = gptoss_parser.prepare_structured_tag(None, tool_server_with_python)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Should have analysis tag + 2 python tags
|
||||
assert len(parsed_result["format"]["tags"]) == 3
|
||||
|
||||
# Verify all expected tags are present
|
||||
tag_begins = [tag["begin"] for tag in parsed_result["format"]["tags"]]
|
||||
expected_begins = [
|
||||
"<|channel|>analysis<|message|>",
|
||||
"<|channel|>commentary to=python",
|
||||
"<|channel|>analysis to=python",
|
||||
]
|
||||
|
||||
for expected in expected_begins:
|
||||
assert expected in tag_begins
|
||||
|
||||
# Verify triggers include commentary
|
||||
assert "<|channel|>analysis" in parsed_result["format"]["triggers"]
|
||||
assert "<|channel|>commentary to=" in parsed_result["format"]["triggers"]
|
||||
|
||||
def test_structured_outputs_params_integration(
|
||||
self, gptoss_parser, tool_server_with_python
|
||||
):
|
||||
"""Test integration with StructuredOutputsParams."""
|
||||
# Generate structural tag
|
||||
structural_tag = gptoss_parser.prepare_structured_tag(
|
||||
None, tool_server_with_python
|
||||
)
|
||||
|
||||
# Create StructuredOutputsParams
|
||||
params = StructuredOutputsParams(structural_tag=structural_tag)
|
||||
|
||||
# Verify the tag is properly stored and accessible
|
||||
assert params.structural_tag == structural_tag
|
||||
|
||||
# Verify the tag is valid JSON
|
||||
parsed_tag = json.loads(params.structural_tag)
|
||||
assert parsed_tag["type"] == "structural_tag"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"browser, python, container, expected_tags",
|
||||
[
|
||||
# No tools
|
||||
(False, False, False, 1),
|
||||
# Single tool
|
||||
(True, False, False, 3),
|
||||
# Multiple tools
|
||||
(True, True, False, 5),
|
||||
# All tools
|
||||
(True, True, True, 7),
|
||||
],
|
||||
)
|
||||
def test_tool_server_interaction_flow(
|
||||
self, gptoss_parser, browser, python, container, expected_tags
|
||||
):
|
||||
"""Test the complete tool server interaction flow."""
|
||||
|
||||
# Create a mock ToolServer
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
|
||||
# Simulate tool availability based on parameters
|
||||
tool_server.has_tool = Mock(
|
||||
side_effect=lambda tool: {
|
||||
"browser": browser,
|
||||
"python": python,
|
||||
"container": container,
|
||||
}.get(tool, False)
|
||||
)
|
||||
|
||||
# Run the parser and verify results
|
||||
result = gptoss_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Validate number of tags
|
||||
assert len(parsed_result["format"]["tags"]) == expected_tags
|
||||
|
||||
# Verify tool-specific tags exist for enabled tools
|
||||
tag_begins = [tag["begin"] for tag in parsed_result["format"]["tags"]]
|
||||
for tool, enabled in {
|
||||
"browser": browser,
|
||||
"python": python,
|
||||
"container": container,
|
||||
}.items():
|
||||
if enabled:
|
||||
assert f"<|channel|>commentary to={tool}" in tag_begins
|
||||
assert f"<|channel|>analysis to={tool}" in tag_begins
|
||||
|
||||
def test_original_tag_preservation(self, gptoss_parser, tool_server_with_python):
|
||||
"""Test that original tags are preserved when provided."""
|
||||
original_tag = '{"type": "custom_tag", "data": "preserved"}'
|
||||
|
||||
result = gptoss_parser.prepare_structured_tag(
|
||||
original_tag, tool_server_with_python
|
||||
)
|
||||
|
||||
# Should return original tag unchanged
|
||||
assert result == original_tag
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tools",
|
||||
[
|
||||
[],
|
||||
["browser"],
|
||||
["python"],
|
||||
["container"],
|
||||
["browser", "python"],
|
||||
["browser", "container"],
|
||||
["python", "container"],
|
||||
["browser", "python", "container"],
|
||||
],
|
||||
)
|
||||
def test_json_validity_comprehensive(self, gptoss_parser, tools):
|
||||
"""Test JSON validity across all possible tool combinations."""
|
||||
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool in tools)
|
||||
|
||||
result = gptoss_parser.prepare_structured_tag(None, tool_server)
|
||||
|
||||
# Should be valid JSON
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Should have correct structure
|
||||
assert parsed_result["type"] == "structural_tag"
|
||||
assert "format" in parsed_result
|
||||
assert "tags" in parsed_result["format"]
|
||||
assert "triggers" in parsed_result["format"]
|
||||
|
||||
# Tag count should be: 1 (analysis) + 2 * len(tools)
|
||||
expected_tag_count = 1 + (2 * len(tools))
|
||||
assert len(parsed_result["format"]["tags"]) == expected_tag_count
|
||||
|
||||
def test_error_handling_invalid_tool_server(self, gptoss_parser):
|
||||
"""Test error handling with invalid tool server."""
|
||||
# Tool server that raises exceptions
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=Exception("Tool server error"))
|
||||
|
||||
# Should handle gracefully and still return a valid tag
|
||||
with pytest.raises(Exception, match="Tool server error"):
|
||||
gptoss_parser.prepare_structured_tag(None, tool_server)
|
||||
|
||||
def test_concurrent_requests_isolation(self, gptoss_parser):
|
||||
"""Test that concurrent requests don't interfere with each other."""
|
||||
# Simulate concurrent requests with different tool servers
|
||||
tool_server_1 = Mock(spec=ToolServer)
|
||||
tool_server_1.has_tool = Mock(side_effect=lambda tool: tool == "python")
|
||||
|
||||
tool_server_2 = Mock(spec=ToolServer)
|
||||
tool_server_2.has_tool = Mock(side_effect=lambda tool: tool == "browser")
|
||||
|
||||
# Generate tags concurrently
|
||||
result_1 = gptoss_parser.prepare_structured_tag(None, tool_server_1)
|
||||
result_2 = gptoss_parser.prepare_structured_tag(None, tool_server_2)
|
||||
|
||||
# Parse results
|
||||
parsed_1 = json.loads(result_1)
|
||||
parsed_2 = json.loads(result_2)
|
||||
|
||||
# Verify they have different tool configurations
|
||||
tags_1 = [tag["begin"] for tag in parsed_1["format"]["tags"]]
|
||||
tags_2 = [tag["begin"] for tag in parsed_2["format"]["tags"]]
|
||||
|
||||
# Result 1 should have python tags
|
||||
assert "<|channel|>commentary to=python" in tags_1
|
||||
assert "<|channel|>commentary to=browser" not in tags_1
|
||||
|
||||
# Result 2 should have browser tags
|
||||
assert "<|channel|>commentary to=browser" in tags_2
|
||||
assert "<|channel|>commentary to=python" not in tags_2
|
||||
|
||||
def test_tag_format_consistency(self, gptoss_parser):
|
||||
"""Test that all generated tags follow consistent format."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(
|
||||
side_effect=lambda tool: tool in ["python", "browser"]
|
||||
)
|
||||
|
||||
result = gptoss_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Verify all tags have required fields
|
||||
for tag in parsed_result["format"]["tags"]:
|
||||
assert "begin" in tag
|
||||
assert "content" in tag
|
||||
assert "end" in tag
|
||||
assert tag["content"]["type"] == "any_text"
|
||||
assert tag["end"] == "<|end|>"
|
||||
|
||||
# Verify begin format
|
||||
assert tag["begin"].startswith("<|channel|>")
|
||||
|
||||
def test_trigger_configuration(self, gptoss_parser):
|
||||
"""Test trigger configuration for different tool setups."""
|
||||
# Test with no tools
|
||||
result_no_tools = gptoss_parser.prepare_structured_tag(None, None)
|
||||
parsed_no_tools = json.loads(result_no_tools)
|
||||
assert parsed_no_tools["format"]["triggers"] == ["<|channel|>analysis"]
|
||||
|
||||
# Test with tools
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == "python")
|
||||
|
||||
result_with_tools = gptoss_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed_with_tools = json.loads(result_with_tools)
|
||||
|
||||
expected_triggers = ["<|channel|>analysis", "<|channel|>commentary to="]
|
||||
assert set(parsed_with_tools["format"]["triggers"]) == set(expected_triggers)
|
||||
@@ -1,11 +1,19 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.reasoning.gptoss_reasoning_parser import GptOssReasoningParser
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
from_builtin_tool_to_tag,
|
||||
no_func_reasoning_tag,
|
||||
)
|
||||
|
||||
REASONING_MODEL_NAME = "openai/gpt-oss-120b"
|
||||
|
||||
@@ -142,3 +150,133 @@ def test_gptoss_is_reasoning_end(
|
||||
output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(output)
|
||||
actual_is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == actual_is_reasoning_end
|
||||
|
||||
|
||||
class TestGptOssStructuralTags:
|
||||
"""Test cases for GptOssReasoningParser structural tag functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tokenizer(self):
|
||||
"""Create a mock tokenizer for testing."""
|
||||
tokenizer = Mock()
|
||||
tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
|
||||
tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
|
||||
return tokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def reasoning_parser(self, mock_tokenizer):
|
||||
"""Create a GptOssReasoningParser instance."""
|
||||
return GptOssReasoningParser(mock_tokenizer)
|
||||
|
||||
def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag with no tool server."""
|
||||
result = reasoning_parser.prepare_structured_tag(None, None)
|
||||
expected = json.dumps(no_func_reasoning_tag)
|
||||
|
||||
assert result == expected
|
||||
|
||||
# Verify the structure is correct
|
||||
parsed = json.loads(result)
|
||||
assert parsed["type"] == "structural_tag"
|
||||
assert parsed["format"]["type"] == "triggered_tags"
|
||||
assert len(parsed["format"]["tags"]) == 1
|
||||
assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
|
||||
|
||||
def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag when original_tag is provided."""
|
||||
original_tag = '{"custom": "tag"}'
|
||||
result = reasoning_parser.prepare_structured_tag(original_tag, None)
|
||||
|
||||
# Should return the original tag unchanged
|
||||
assert result == original_tag
|
||||
|
||||
def test_from_builtin_tool_to_tag(self):
|
||||
"""Test from_builtin_tool_to_tag function."""
|
||||
tags = from_builtin_tool_to_tag("python")
|
||||
|
||||
assert len(tags) == 2
|
||||
assert tags[0]["begin"] == "<|channel|>commentary to=python"
|
||||
assert tags[0]["content"]["type"] == "any_text"
|
||||
assert tags[0]["end"] == "<|end|>"
|
||||
|
||||
assert tags[1]["begin"] == "<|channel|>analysis to=python"
|
||||
assert tags[1]["content"]["type"] == "any_text"
|
||||
assert tags[1]["end"] == "<|end|>"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tools",
|
||||
[
|
||||
[],
|
||||
["browser"],
|
||||
["python"],
|
||||
["container"],
|
||||
["browser", "python"],
|
||||
["browser", "container"],
|
||||
["python", "container"],
|
||||
["browser", "python", "container"],
|
||||
],
|
||||
)
|
||||
def test_json_validity_comprehensive(self, reasoning_parser, tools):
|
||||
"""Test JSON validity across all possible tool combinations."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool in tools)
|
||||
|
||||
result = reasoning_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
assert parsed_result["type"] == "structural_tag"
|
||||
assert "format" in parsed_result
|
||||
assert "tags" in parsed_result["format"]
|
||||
assert "triggers" in parsed_result["format"]
|
||||
|
||||
# Tag count should be: 1 (analysis) + 2 * len(tools)
|
||||
expected_tag_count = 1 + (2 * len(tools))
|
||||
assert len(parsed_result["format"]["tags"]) == expected_tag_count
|
||||
|
||||
# Verify triggers are correctly configured
|
||||
expected_triggers = ["<|channel|>analysis"]
|
||||
if tools:
|
||||
expected_triggers.append("<|channel|>commentary to=")
|
||||
assert set(parsed_result["format"]["triggers"]) == set(expected_triggers)
|
||||
|
||||
def test_no_cross_request_state_pollution(self, reasoning_parser):
|
||||
"""Test that sequential calls with different tool servers produce
|
||||
independent results, guarding against shared mutable state
|
||||
(e.g. missing deepcopy in tag_with_builtin_funcs)."""
|
||||
tool_server_1 = Mock(spec=ToolServer)
|
||||
tool_server_1.has_tool = Mock(side_effect=lambda tool: tool == "python")
|
||||
|
||||
tool_server_2 = Mock(spec=ToolServer)
|
||||
tool_server_2.has_tool = Mock(side_effect=lambda tool: tool == "browser")
|
||||
|
||||
result_1 = reasoning_parser.prepare_structured_tag(None, tool_server_1)
|
||||
result_2 = reasoning_parser.prepare_structured_tag(None, tool_server_2)
|
||||
|
||||
tags_1 = [tag["begin"] for tag in json.loads(result_1)["format"]["tags"]]
|
||||
tags_2 = [tag["begin"] for tag in json.loads(result_2)["format"]["tags"]]
|
||||
|
||||
assert "<|channel|>commentary to=python" in tags_1
|
||||
assert "<|channel|>commentary to=browser" not in tags_1
|
||||
|
||||
assert "<|channel|>commentary to=browser" in tags_2
|
||||
assert "<|channel|>commentary to=python" not in tags_2
|
||||
|
||||
def test_tag_format_consistency(self, reasoning_parser):
|
||||
"""Test that all generated tags follow consistent format,
|
||||
catching malformed tags from from_builtin_tool_to_tag."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(
|
||||
side_effect=lambda tool: tool in ["python", "browser"]
|
||||
)
|
||||
|
||||
result = reasoning_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
for tag in parsed_result["format"]["tags"]:
|
||||
assert "begin" in tag
|
||||
assert "content" in tag
|
||||
assert "end" in tag
|
||||
assert tag["content"]["type"] == "any_text"
|
||||
assert tag["end"] == "<|end|>"
|
||||
assert tag["begin"].startswith("<|channel|>")
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Unit tests for GPT-OSS structural tag support in reasoning (PR #25515)."""
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
from_builtin_tool_to_tag,
|
||||
no_func_reaonsing_tag,
|
||||
tag_with_builtin_funcs,
|
||||
)
|
||||
|
||||
|
||||
class TestGptOssReasoningParser:
|
||||
"""Test cases for GptOssReasoningParser structural tag functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tokenizer(self):
|
||||
"""Create a mock tokenizer for testing."""
|
||||
tokenizer = Mock()
|
||||
tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
|
||||
tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
|
||||
return tokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def reasoning_parser(self, mock_tokenizer):
|
||||
"""Create a GptOssReasoningParser instance."""
|
||||
return GptOssReasoningParser(mock_tokenizer)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_empty(self):
|
||||
"""Create a mock ToolServer with no tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(return_value=False)
|
||||
return tool_server
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_with_browser(self):
|
||||
"""Create a mock ToolServer with browser tool."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
|
||||
return tool_server
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_with_all_tools(self):
|
||||
"""Create a mock ToolServer with all builtin tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(
|
||||
side_effect=lambda tool: tool in ["browser", "python", "container"]
|
||||
)
|
||||
return tool_server
|
||||
|
||||
def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag with no tool server."""
|
||||
result = reasoning_parser.prepare_structured_tag(None, None)
|
||||
expected = json.dumps(no_func_reaonsing_tag)
|
||||
|
||||
assert result == expected
|
||||
|
||||
# Verify the structure is correct
|
||||
parsed = json.loads(result)
|
||||
assert parsed["type"] == "structural_tag"
|
||||
assert parsed["format"]["type"] == "triggered_tags"
|
||||
assert len(parsed["format"]["tags"]) == 1
|
||||
assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
|
||||
|
||||
def test_prepare_structured_tag_with_all_tools(
|
||||
self, reasoning_parser, mock_tool_server_with_all_tools
|
||||
):
|
||||
"""Test prepare_structured_tag with all builtin tools."""
|
||||
result = reasoning_parser.prepare_structured_tag(
|
||||
None, mock_tool_server_with_all_tools
|
||||
)
|
||||
parsed = json.loads(result)
|
||||
|
||||
# Should have analysis tag + tags for all 3 tools (2 tags each)
|
||||
assert len(parsed["format"]["tags"]) == 7 # 1 analysis + 6 tool tags
|
||||
|
||||
# Check all tool tags are present
|
||||
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
|
||||
for tool in ["browser", "python", "container"]:
|
||||
assert f"<|channel|>commentary to={tool}" in tag_begins
|
||||
assert f"<|channel|>analysis to={tool}" in tag_begins
|
||||
|
||||
def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag when original_tag is provided."""
|
||||
original_tag = '{"custom": "tag"}'
|
||||
result = reasoning_parser.prepare_structured_tag(original_tag, None)
|
||||
|
||||
# Should return the original tag unchanged
|
||||
assert result == original_tag
|
||||
|
||||
def test_from_builtin_tool_to_tag(self):
|
||||
"""Test from_builtin_tool_to_tag function."""
|
||||
tags = from_builtin_tool_to_tag("python")
|
||||
|
||||
assert len(tags) == 2
|
||||
assert tags[0]["begin"] == "<|channel|>commentary to=python"
|
||||
assert tags[0]["content"]["type"] == "any_text"
|
||||
assert tags[0]["end"] == "<|end|>"
|
||||
|
||||
assert tags[1]["begin"] == "<|channel|>analysis to=python"
|
||||
assert tags[1]["content"]["type"] == "any_text"
|
||||
assert tags[1]["end"] == "<|end|>"
|
||||
|
||||
def test_tag_with_builtin_funcs(self):
|
||||
"""Test tag_with_builtin_funcs function."""
|
||||
builtin_tools = ["browser", "python"]
|
||||
result = tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tools)
|
||||
|
||||
assert result["type"] == "structural_tag"
|
||||
# Should have original analysis tag + 2 tags per tool
|
||||
assert len(result["format"]["tags"]) == 5 # 1 + 2*2
|
||||
|
||||
# Should have added commentary trigger
|
||||
assert "<|channel|>commentary to=" in result["format"]["triggers"]
|
||||
assert "<|channel|>analysis" in result["format"]["triggers"]
|
||||
|
||||
def test_tag_structure_invariants(self):
|
||||
"""Test that the basic tag structure follows expected format."""
|
||||
# Test the base no_func_reaonsing_tag structure
|
||||
assert no_func_reaonsing_tag["type"] == "structural_tag"
|
||||
assert no_func_reaonsing_tag["format"]["type"] == "triggered_tags"
|
||||
assert no_func_reaonsing_tag["format"]["stop_after_first"] is False
|
||||
|
||||
# Verify analysis tag structure
|
||||
analysis_tag = no_func_reaonsing_tag["format"]["tags"][0]
|
||||
assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert analysis_tag["content"]["type"] == "any_text"
|
||||
assert analysis_tag["end"] == "<|end|>"
|
||||
|
||||
def test_json_serialization_valid(
|
||||
self, reasoning_parser, mock_tool_server_with_all_tools
|
||||
):
|
||||
"""Test that all generated tags produce valid JSON."""
|
||||
# Test with no tool server
|
||||
result1 = reasoning_parser.prepare_structured_tag(None, None)
|
||||
json.loads(result1) # Should not raise
|
||||
|
||||
# Test with empty tool server
|
||||
empty_server = Mock(spec=ToolServer)
|
||||
empty_server.has_tool = Mock(return_value=False)
|
||||
result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
|
||||
json.loads(result2) # Should not raise
|
||||
|
||||
# Test with tools
|
||||
result3 = reasoning_parser.prepare_structured_tag(
|
||||
None, mock_tool_server_with_all_tools
|
||||
)
|
||||
json.loads(result3) # Should not raise
|
||||
|
||||
@pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
|
||||
def test_single_tool_integration(self, reasoning_parser, tool_name):
|
||||
"""Test integration with individual tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
|
||||
|
||||
result = reasoning_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed = json.loads(result)
|
||||
|
||||
# Should have 1 analysis + 2 tool-specific tags
|
||||
assert len(parsed["format"]["tags"]) == 3
|
||||
|
||||
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
|
||||
assert f"<|channel|>commentary to={tool_name}" in tag_begins
|
||||
assert f"<|channel|>analysis to={tool_name}" in tag_begins
|
||||
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
no_func_reaonsing_tag = {
|
||||
no_func_reasoning_tag = {
|
||||
"type": "structural_tag",
|
||||
"format": {
|
||||
"type": "triggered_tags",
|
||||
@@ -51,10 +51,10 @@ def from_builtin_tool_to_tag(tool: str) -> list[dict]:
|
||||
return tag
|
||||
|
||||
|
||||
def tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list: list[str]) -> dict:
|
||||
def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str]) -> dict:
|
||||
import copy
|
||||
|
||||
new_tag = copy.deepcopy(no_func_reaonsing_tag)
|
||||
new_tag = copy.deepcopy(no_func_reasoning_tag)
|
||||
new_tag["format"]["triggers"].append("<|channel|>commentary to=")
|
||||
|
||||
for tool in builtin_tool_list:
|
||||
@@ -162,7 +162,7 @@ class GptOssReasoningParser(ReasoningParser):
|
||||
) -> str | None:
|
||||
if original_tag is None:
|
||||
if tool_server is None:
|
||||
return json.dumps(no_func_reaonsing_tag)
|
||||
return json.dumps(no_func_reasoning_tag)
|
||||
else:
|
||||
builtin_tool_list: list[str] = []
|
||||
if tool_server.has_tool("browser"):
|
||||
@@ -175,11 +175,11 @@ class GptOssReasoningParser(ReasoningParser):
|
||||
if len(builtin_tool_list) > 0:
|
||||
logger.info("Builtin_tool_list: %s", builtin_tool_list)
|
||||
func_tag = json.dumps(
|
||||
tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list)
|
||||
tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list)
|
||||
)
|
||||
else:
|
||||
logger.info("Builtin_tool_list is empty")
|
||||
func_tag = json.dumps(no_func_reaonsing_tag)
|
||||
func_tag = json.dumps(no_func_reasoning_tag)
|
||||
|
||||
return func_tag
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user