fix(glm47): improve tool call parsing and content normalization (#37386)

Signed-off-by: karanb192 <karan@example.com> Co-authored-by: karanb192 <karan@example.com>
2026-03-18 13:42:21 +05:30
parent 8c31f47c63
commit fad09e8a1f
4 changed files with 193 additions and 6 deletions
--- a/tests/tool_parsers/test_glm47_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm47_moe_tool_parser.py
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+"""Tests for the GLM-4.7 tool call parser."""
+
+import json
+from unittest.mock import Mock
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+    FunctionDefinition,
+)
+from vllm.tokenizers import get_tokenizer
+from vllm.tool_parsers.glm47_moe_tool_parser import Glm47MoeModelToolParser
+
+MODEL = "zai-org/GLM-4.5"
+
+
+@pytest.fixture(scope="module")
+def glm47_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
+
+
+@pytest.fixture
+def glm47_tool_parser(glm47_tokenizer):
+    return Glm47MoeModelToolParser(glm47_tokenizer)
+
+
+@pytest.fixture
+def mock_request() -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(name="get_current_date", parameters={}),
+        ),
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string"},
+                        "date": {"type": "string"},
+                    },
+                },
+            ),
+        ),
+    ]
+    request.tool_choice = "auto"
+    return request
+
+
+class TestGlm47ExtractToolCalls:
+    def test_no_tool_call(self, glm47_tool_parser, mock_request):
+        out = "This is a plain response."
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert not r.tools_called
+        assert r.content == out
+
+    def test_zero_arg_inline(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_current_date</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.tool_calls[0].function.name == "get_current_date"
+        assert json.loads(r.tool_calls[0].function.arguments) == {}
+        assert r.content is None
+
+    def test_zero_arg_newline(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_current_date\n</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.tool_calls[0].function.name == "get_current_date"
+
+    def test_args_same_line(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_weather<arg_key>city</arg_key><arg_value>Beijing</arg_value></tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert json.loads(r.tool_calls[0].function.arguments) == {"city": "Beijing"}
+
+    def test_args_with_newlines(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>Beijing</arg_value>\n</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert json.loads(r.tool_calls[0].function.arguments) == {"city": "Beijing"}
+
+    def test_content_before(self, glm47_tool_parser, mock_request):
+        out = "Checking.<tool_call>get_current_date</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.content == "Checking."
+
+    def test_multiple(self, glm47_tool_parser, mock_request):
+        out = (
+            "<tool_call>get_weather<arg_key>city</arg_key><arg_value>Beijing</arg_value></tool_call>"
+            "<tool_call>get_weather<arg_key>city</arg_key><arg_value>Shanghai</arg_value></tool_call>"
+        )
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert len(r.tool_calls) == 2
+
+    def test_empty_content_none(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_current_date</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.content is None
+
+    def test_whitespace_content_none(self, glm47_tool_parser, mock_request):
+        out = "  \n  <tool_call>get_current_date</tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.content is None
+
+
+def _reset(parser):
+    parser._buffer = ""
+    parser._in_tool_call = False
+    parser.current_tool_name_sent = False
+    parser._current_tool_name = None
+    parser._pending_key = None
+    parser._streaming_string_value = False
+    parser.prev_tool_call_arr = []
+    parser.current_tool_id = -1
+    parser.streamed_args_for_tool = []
+    parser._tool_call_ids = []
+    parser._args_started = []
+    parser._args_closed = []
+    parser._seen_keys = []
+
+
+class TestGlm47Streaming:
+    def test_no_args(self, glm47_tool_parser, mock_request):
+        _reset(glm47_tool_parser)
+        for chunk in ["<tool_call>", "get_current_date", "</tool_call>"]:
+            glm47_tool_parser.extract_tool_calls_streaming(
+                previous_text="",
+                current_text="",
+                delta_text=chunk,
+                previous_token_ids=[],
+                current_token_ids=[],
+                delta_token_ids=[],
+                request=mock_request,
+            )
+        assert len(glm47_tool_parser.prev_tool_call_arr) >= 1
+
+    def test_with_args(self, glm47_tool_parser, mock_request):
+        _reset(glm47_tool_parser)
+        # Split chunks so that the incremental string streaming path
+        # processes the value, its closing tag, and the tool-call closing
+        # tag in separate calls.
+        for chunk in [
+            "<tool_call>",
+            "get_weather\n",
+            "<arg_key>city</arg_key>",
+            "<arg_value>",
+            "Beijing",
+            "</arg_value>",
+            "</tool_call>",
+        ]:
+            glm47_tool_parser.extract_tool_calls_streaming(
+                previous_text="",
+                current_text="",
+                delta_text=chunk,
+                previous_token_ids=[],
+                current_token_ids=[],
+                delta_token_ids=[],
+                request=mock_request,
+            )
+        assert glm47_tool_parser.prev_tool_call_arr[0]["arguments"]["city"] == "Beijing"
--- a/tests/tool_parsers/test_glm4_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -107,7 +107,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
                    )
                )
            ],
-            "",
+            None,
        ),
        (
            """<tool_call>get_current_weather
@@ -152,7 +152,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
                    )
                ),
            ],
-            "",
+            None,
        ),
        (
            """I'll help you check the weather. <tool_call>get_current_weather
@@ -202,7 +202,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
                    )
                )
            ],
-            "",
+            None,
        ),
        (
            """I will help you get the weather.<tool_call>get_weather
--- a/vllm/tool_parsers/glm47_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm47_moe_tool_parser.py
@@ -1,6 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+GLM-4.7 Tool Call Parser.

+GLM-4.7 uses a slightly different tool call format compared to GLM-4.5:
+  - The function name may appear on the same line as ``<tool_call>`` without
+    a newline separator before the first ``<arg_key>``.
+  - Tool calls may have zero arguments
+    (e.g. ``<tool_call>func</tool_call>``).
+
+This parser overrides the parent regex patterns to handle both formats.
+"""

 import regex as re

@@ -14,10 +24,14 @@ logger = init_logger(__name__)
 class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
    def __init__(self, tokenizer: TokenizerLike):
        super().__init__(tokenizer)
+        # GLM-4.7 format: <tool_call>func_name[<arg_key>...]*</tool_call>
+        # The function name can be followed by a newline, whitespace, or
+        # directly by <arg_key> tags (no separator).  The arg section is
+        # optional so that zero-argument calls are supported.
        self.func_detail_regex = re.compile(
-            r"<tool_call>(.*?)(<arg_key>.*?)?</tool_call>", re.DOTALL
+            r"<tool_call>\s*(\S+?)\s*(<arg_key>.*)?</tool_call>", re.DOTALL
        )
        self.func_arg_regex = re.compile(
-            r"<arg_key>(.*?)</arg_key>(?:\\n|\s)*<arg_value>(.*?)</arg_value>",
+            r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
            re.DOTALL,
        )
--- a/vllm/tool_parsers/glm4_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm4_moe_tool_parser.py
@@ -206,7 +206,12 @@ class Glm4MoeModelToolParser(ToolParser):
            )
        else:
            if len(tool_calls) > 0:
-                content = model_output[: model_output.find(self.tool_calls_start_token)]
+                content: str | None = model_output[
+                    : model_output.find(self.tool_calls_start_token)
+                ]
+                # Normalize empty/whitespace-only content to None
+                if not content or not content.strip():
+                    content = None
                return ExtractedToolCallInformation(
                    tools_called=True, tool_calls=tool_calls, content=content
                )