[Tool Parser][2/3] Use self.tools instead of request.tools in tool parsers (#38189)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
2026-03-31 01:41:36 -04:00
parent 44eef0ca1e
commit d53cb9cb8e
16 changed files with 113 additions and 105 deletions
--- a/tests/tool_parsers/test_glm4_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -27,14 +27,8 @@ def glm4_moe_tokenizer():


@pytest.fixture
-def glm4_moe_tool_parser(glm4_moe_tokenizer):
-    return Glm4MoeModelToolParser(glm4_moe_tokenizer)
-
-
-@pytest.fixture
-def mock_request() -> ChatCompletionRequest:
-    request = Mock(spec=ChatCompletionRequest)
-    request.tools = [  # GLM45 parser needs this attribute to enable tool parsing.
+def sample_tools():
+    return [
        ChatCompletionToolsParam(
            function=FunctionDefinition(
                name="get_weather",
@@ -42,6 +36,17 @@ def mock_request() -> ChatCompletionRequest:
            ),
        ),
    ]
+
+
+@pytest.fixture
+def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools):
+    return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools)
+
+
+@pytest.fixture
+def mock_request(sample_tools) -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = sample_tools
    return request


@@ -671,14 +676,13 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
    assert '"' in parsed["message"] or "world" in parsed["message"]


-def test_streaming_long_content_incremental(glm4_moe_tool_parser):
+def test_streaming_long_content_incremental(glm4_moe_tokenizer):
    """Test incremental streaming of long content (Issue #32829).

    This is the core fix: for long string values like code (4000+ chars),
    the parser should stream incrementally rather than buffering until
    complete. This test verifies we get many fragments, not just 1-3.
    """
-    _reset_streaming_state(glm4_moe_tool_parser)

    # Bubble sort example from Issue #32829 - realistic long content
    bubble_sort_code = '''#!/usr/bin/env python3
@@ -705,27 +709,28 @@ if __name__ == "__main__":
    sorted_arr = bubble_sort(test_arr.copy())
    print(f"Sorted: {sorted_arr}")'''

-    # Create a request with tool schema to enable string type detection
+    # Create tools with schema to enable string type detection
    # This is required for incremental streaming of string values
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="write_to_file",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "file_path": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            ),
+        ),
+    ]
+    glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
    request = ChatCompletionRequest(
        model=MODEL,
        messages=[],
-        tools=[
-            {
-                "type": "function",
-                "function": {
-                    "name": "write_to_file",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {"type": "string"},
-                            "content": {"type": "string"},
-                        },
-                    },
-                },
-            }
-        ],
-    )  # type: ignore
+        tools=tools,
+    )

    # Simulate token-based streaming (special tags as single tokens)
    chunks = [