fix(glm47): improve tool call parsing and content normalization (#37386)

Signed-off-by: karanb192 <karan@example.com>
Co-authored-by: karanb192 <karan@example.com>
This commit is contained in:
Karan Bansal
2026-03-18 13:42:21 +05:30
committed by GitHub
parent 8c31f47c63
commit fad09e8a1f
4 changed files with 193 additions and 6 deletions

View File

@@ -0,0 +1,168 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: E501
"""Tests for the GLM-4.7 tool call parser."""
import json
from unittest.mock import Mock
import pytest
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
FunctionDefinition,
)
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.glm47_moe_tool_parser import Glm47MoeModelToolParser
MODEL = "zai-org/GLM-4.5"
@pytest.fixture(scope="module")
def glm47_tokenizer():
return get_tokenizer(tokenizer_name=MODEL)
@pytest.fixture
def glm47_tool_parser(glm47_tokenizer):
return Glm47MoeModelToolParser(glm47_tokenizer)
@pytest.fixture
def mock_request() -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = [
ChatCompletionToolsParam(
function=FunctionDefinition(name="get_current_date", parameters={}),
),
ChatCompletionToolsParam(
function=FunctionDefinition(
name="get_weather",
parameters={
"type": "object",
"properties": {
"city": {"type": "string"},
"date": {"type": "string"},
},
},
),
),
]
request.tool_choice = "auto"
return request
class TestGlm47ExtractToolCalls:
def test_no_tool_call(self, glm47_tool_parser, mock_request):
out = "This is a plain response."
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert not r.tools_called
assert r.content == out
def test_zero_arg_inline(self, glm47_tool_parser, mock_request):
out = "<tool_call>get_current_date</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.tools_called
assert r.tool_calls[0].function.name == "get_current_date"
assert json.loads(r.tool_calls[0].function.arguments) == {}
assert r.content is None
def test_zero_arg_newline(self, glm47_tool_parser, mock_request):
out = "<tool_call>get_current_date\n</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.tools_called
assert r.tool_calls[0].function.name == "get_current_date"
def test_args_same_line(self, glm47_tool_parser, mock_request):
out = "<tool_call>get_weather<arg_key>city</arg_key><arg_value>Beijing</arg_value></tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.tools_called
assert json.loads(r.tool_calls[0].function.arguments) == {"city": "Beijing"}
def test_args_with_newlines(self, glm47_tool_parser, mock_request):
out = "<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>Beijing</arg_value>\n</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.tools_called
assert json.loads(r.tool_calls[0].function.arguments) == {"city": "Beijing"}
def test_content_before(self, glm47_tool_parser, mock_request):
out = "Checking.<tool_call>get_current_date</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.tools_called
assert r.content == "Checking."
def test_multiple(self, glm47_tool_parser, mock_request):
out = (
"<tool_call>get_weather<arg_key>city</arg_key><arg_value>Beijing</arg_value></tool_call>"
"<tool_call>get_weather<arg_key>city</arg_key><arg_value>Shanghai</arg_value></tool_call>"
)
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert len(r.tool_calls) == 2
def test_empty_content_none(self, glm47_tool_parser, mock_request):
out = "<tool_call>get_current_date</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.content is None
def test_whitespace_content_none(self, glm47_tool_parser, mock_request):
out = " \n <tool_call>get_current_date</tool_call>"
r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
assert r.content is None
def _reset(parser):
parser._buffer = ""
parser._in_tool_call = False
parser.current_tool_name_sent = False
parser._current_tool_name = None
parser._pending_key = None
parser._streaming_string_value = False
parser.prev_tool_call_arr = []
parser.current_tool_id = -1
parser.streamed_args_for_tool = []
parser._tool_call_ids = []
parser._args_started = []
parser._args_closed = []
parser._seen_keys = []
class TestGlm47Streaming:
def test_no_args(self, glm47_tool_parser, mock_request):
_reset(glm47_tool_parser)
for chunk in ["<tool_call>", "get_current_date", "</tool_call>"]:
glm47_tool_parser.extract_tool_calls_streaming(
previous_text="",
current_text="",
delta_text=chunk,
previous_token_ids=[],
current_token_ids=[],
delta_token_ids=[],
request=mock_request,
)
assert len(glm47_tool_parser.prev_tool_call_arr) >= 1
def test_with_args(self, glm47_tool_parser, mock_request):
_reset(glm47_tool_parser)
# Split chunks so that the incremental string streaming path
# processes the value, its closing tag, and the tool-call closing
# tag in separate calls.
for chunk in [
"<tool_call>",
"get_weather\n",
"<arg_key>city</arg_key>",
"<arg_value>",
"Beijing",
"</arg_value>",
"</tool_call>",
]:
glm47_tool_parser.extract_tool_calls_streaming(
previous_text="",
current_text="",
delta_text=chunk,
previous_token_ids=[],
current_token_ids=[],
delta_token_ids=[],
request=mock_request,
)
assert glm47_tool_parser.prev_tool_call_arr[0]["arguments"]["city"] == "Beijing"

View File

@@ -107,7 +107,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
)
)
],
"",
None,
),
(
"""<tool_call>get_current_weather
@@ -152,7 +152,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
)
),
],
"",
None,
),
(
"""I'll help you check the weather. <tool_call>get_current_weather
@@ -202,7 +202,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request):
)
)
],
"",
None,
),
(
"""I will help you get the weather.<tool_call>get_weather

View File

@@ -1,6 +1,16 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
GLM-4.7 Tool Call Parser.
GLM-4.7 uses a slightly different tool call format compared to GLM-4.5:
- The function name may appear on the same line as ``<tool_call>`` without
a newline separator before the first ``<arg_key>``.
- Tool calls may have zero arguments
(e.g. ``<tool_call>func</tool_call>``).
This parser overrides the parent regex patterns to handle both formats.
"""
import regex as re
@@ -14,10 +24,14 @@ logger = init_logger(__name__)
class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
# GLM-4.7 format: <tool_call>func_name[<arg_key>...]*</tool_call>
# The function name can be followed by a newline, whitespace, or
# directly by <arg_key> tags (no separator). The arg section is
# optional so that zero-argument calls are supported.
self.func_detail_regex = re.compile(
r"<tool_call>(.*?)(<arg_key>.*?)?</tool_call>", re.DOTALL
r"<tool_call>\s*(\S+?)\s*(<arg_key>.*)?</tool_call>", re.DOTALL
)
self.func_arg_regex = re.compile(
r"<arg_key>(.*?)</arg_key>(?:\\n|\s)*<arg_value>(.*?)</arg_value>",
r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
re.DOTALL,
)

View File

@@ -206,7 +206,12 @@ class Glm4MoeModelToolParser(ToolParser):
)
else:
if len(tool_calls) > 0:
content = model_output[: model_output.find(self.tool_calls_start_token)]
content: str | None = model_output[
: model_output.find(self.tool_calls_start_token)
]
# Normalize empty/whitespace-only content to None
if not content or not content.strip():
content = None
return ExtractedToolCallInformation(
tools_called=True, tool_calls=tool_calls, content=content
)