From 0de53339894ef2cef20512e31b4b8e0d83dcb6de Mon Sep 17 00:00:00 2001 From: Robin Nabel Date: Tue, 24 Feb 2026 14:27:42 +0000 Subject: [PATCH] Fix GLM4 parser tests (#34905) Signed-off-by: Robin Nabel Co-authored-by: Chauncey --- .../tool_parsers/test_glm4_moe_tool_parser.py | 112 +++++++++++------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py index b5b597798..292714cde 100644 --- a/tests/tool_parsers/test_glm4_moe_tool_parser.py +++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py @@ -1,19 +1,22 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# ruff: noqa: E501 import json +from unittest.mock import Mock import pytest -from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, + FunctionDefinition, +) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.glm4_moe_tool_parser import ( Glm4MoeModelToolParser, ) -pytest.skip("skip glm4_moe parser test", allow_module_level=True) # Use a common model that is likely to be available MODEL = "zai-org/GLM-4.5" @@ -28,6 +31,20 @@ def glm4_moe_tool_parser(glm4_moe_tokenizer): return Glm4MoeModelToolParser(glm4_moe_tokenizer) +@pytest.fixture +def mock_request() -> ChatCompletionRequest: + request = Mock(spec=ChatCompletionRequest) + request.tools = [ # GLM45 parser needs this attribute to enable tool parsing. + ChatCompletionToolsParam( + function=FunctionDefinition( + name="get_weather", + parameters={"city": {"type": "string"}}, + ), + ), + ] + return request + + def assert_tool_calls( actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall] ): @@ -47,10 +64,10 @@ def assert_tool_calls( assert actual_args == expected_args -def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): +def test_extract_tool_calls_no_tools(glm4_moe_tool_parser, mock_request): model_output = "This is a test" extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert not extracted_tool_calls.tools_called assert extracted_tool_calls.tool_calls == [] @@ -90,7 +107,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): ) ) ], - None, + "", ), ( """get_current_weather @@ -135,7 +152,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): ) ), ], - None, + "", ), ( """I'll help you check the weather. get_current_weather @@ -160,7 +177,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): ) ) ], - "I'll help you check the weather.", + "I'll help you check the weather. ", ), ( """get_current_weather @@ -185,7 +202,7 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): ) ) ], - None, + "", ), ( """I will help you get the weather.get_weather @@ -212,10 +229,14 @@ def test_extract_tool_calls_no_tools(glm4_moe_tool_parser): ], ) def test_extract_tool_calls( - glm4_moe_tool_parser, model_output, expected_tool_calls, expected_content + glm4_moe_tool_parser, + mock_request, + model_output, + expected_tool_calls, + expected_content, ): extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls) @@ -223,7 +244,7 @@ def test_extract_tool_calls( assert extracted_tool_calls.content == expected_content -def test_extract_tool_calls_with_thinking_tags(glm4_moe_tool_parser): +def test_extract_tool_calls_with_thinking_tags(glm4_moe_tool_parser, mock_request): """Test tool extraction when thinking tags are present.""" model_output = """I want to get the weather. @@ -236,7 +257,7 @@ I will help you get the weather. """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called @@ -245,11 +266,12 @@ I will help you get the weather. expected_content = """I want to get the weather. -I will help you get the weather.""" +I will help you get the weather. +""" assert extracted_tool_calls.content == expected_content -def test_extract_tool_calls_malformed_xml(glm4_moe_tool_parser): +def test_extract_tool_calls_malformed_xml(glm4_moe_tool_parser, mock_request): """Test that malformed XML is handled gracefully.""" model_output = """get_weather city @@ -259,7 +281,7 @@ def test_extract_tool_calls_malformed_xml(glm4_moe_tool_parser): """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] # Should handle malformed XML gracefully @@ -269,13 +291,13 @@ def test_extract_tool_calls_malformed_xml(glm4_moe_tool_parser): assert isinstance(extracted_tool_calls.tool_calls, list) -def test_extract_tool_calls_empty_arguments(glm4_moe_tool_parser): +def test_extract_tool_calls_empty_arguments(glm4_moe_tool_parser, mock_request): """Test tool calls with no arguments.""" model_output = """get_current_time """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called @@ -285,7 +307,7 @@ def test_extract_tool_calls_empty_arguments(glm4_moe_tool_parser): assert extracted_tool_calls.tool_calls[0].function.arguments == "{}" -def test_extract_tool_calls_mixed_content(glm4_moe_tool_parser): +def test_extract_tool_calls_mixed_content(glm4_moe_tool_parser, mock_request): """Test extraction with mixed content and multiple tool calls.""" model_output = """I will help you get the weather info. @@ -306,7 +328,7 @@ meaningwhile, I will also check the weather in Shanghai. """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called @@ -325,10 +347,10 @@ meaningwhile, I will also check the weather in Shanghai. assert args2["date"] == "2025-08-01" # Content should be everything before the first tool call - assert extracted_tool_calls.content == "I will help you get the weather info." + assert extracted_tool_calls.content == "I will help you get the weather info.\n\n" -def test_streaming_basic_functionality(glm4_moe_tool_parser): +def test_streaming_basic_functionality(glm4_moe_tool_parser, mock_request): """Test basic streaming functionality.""" # Reset streaming state glm4_moe_tool_parser.current_tool_name_sent = False @@ -353,7 +375,7 @@ def test_streaming_basic_functionality(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[tool_call_start_id, tool_call_end_id], delta_token_ids=[tool_call_end_id], - request=None, + request=mock_request, ) # The result behavior depends on the streaming state @@ -361,7 +383,7 @@ def test_streaming_basic_functionality(glm4_moe_tool_parser): assert result is None or hasattr(result, "tool_calls") or hasattr(result, "content") -def test_streaming_no_tool_calls(glm4_moe_tool_parser): +def test_streaming_no_tool_calls(glm4_moe_tool_parser, mock_request): """Test streaming when there are no tool calls.""" current_text = "This is just regular text without any tool calls." @@ -372,7 +394,7 @@ def test_streaming_no_tool_calls(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # Should return the delta text as content @@ -381,7 +403,7 @@ def test_streaming_no_tool_calls(glm4_moe_tool_parser): assert result.content == " without any tool calls." -def test_streaming_with_content_before_tool_calls(glm4_moe_tool_parser): +def test_streaming_with_content_before_tool_calls(glm4_moe_tool_parser, mock_request): """Test streaming when there's content before tool calls.""" # Reset streaming state glm4_moe_tool_parser.current_tool_name_sent = False @@ -398,16 +420,16 @@ def test_streaming_with_content_before_tool_calls(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # Should return content when no tool call tokens are detected assert result is not None assert hasattr(result, "content") - assert result.content == "get the weather." + assert result.content == "get the weather." -def test_extract_tool_calls_special_characters(glm4_moe_tool_parser): +def test_extract_tool_calls_special_characters(glm4_moe_tool_parser, mock_request): """Test tool calls with special characters and unicode.""" model_output = """send_message recipient @@ -419,7 +441,7 @@ def test_extract_tool_calls_special_characters(glm4_moe_tool_parser): """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called @@ -432,7 +454,7 @@ def test_extract_tool_calls_special_characters(glm4_moe_tool_parser): assert args["priority"] == "high" -def test_extract_tool_calls_incomplete_tool_call(glm4_moe_tool_parser): +def test_extract_tool_calls_incomplete_tool_call(glm4_moe_tool_parser, mock_request): """Test incomplete tool calls (missing closing tag).""" model_output = """get_weather city @@ -441,7 +463,7 @@ def test_extract_tool_calls_incomplete_tool_call(glm4_moe_tool_parser): 2025-08-01""" extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] # Incomplete tool calls should not be extracted @@ -467,7 +489,7 @@ def _reset_streaming_state(parser): parser._seen_keys = [] -def test_streaming_incremental_string_value(glm4_moe_tool_parser): +def test_streaming_incremental_string_value(glm4_moe_tool_parser, mock_request): """Test incremental streaming of string argument values.""" _reset_streaming_state(glm4_moe_tool_parser) @@ -492,7 +514,7 @@ def test_streaming_incremental_string_value(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) if result is not None and hasattr(result, "tool_calls") and result.tool_calls: for tc in result.tool_calls: @@ -516,7 +538,7 @@ def test_streaming_incremental_string_value(glm4_moe_tool_parser): assert "get_weather" in combined or "name:get_weather" in combined -def test_streaming_empty_tool_call(glm4_moe_tool_parser): +def test_streaming_empty_tool_call(glm4_moe_tool_parser, mock_request): """Test that empty tool calls don't cause infinite loops.""" _reset_streaming_state(glm4_moe_tool_parser) @@ -528,7 +550,7 @@ def test_streaming_empty_tool_call(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # Should not hang and should return something (None or content) @@ -538,7 +560,7 @@ def test_streaming_empty_tool_call(glm4_moe_tool_parser): assert glm4_moe_tool_parser.current_tool_id == -1 -def test_streaming_prev_tool_call_arr_finalization(glm4_moe_tool_parser): +def test_streaming_prev_tool_call_arr_finalization(glm4_moe_tool_parser, mock_request): """Test that prev_tool_call_arr contains parsed dict after tool call.""" _reset_streaming_state(glm4_moe_tool_parser) @@ -558,7 +580,7 @@ def test_streaming_prev_tool_call_arr_finalization(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # After the tool call completes, prev_tool_call_arr should have parsed dict @@ -571,7 +593,7 @@ def test_streaming_prev_tool_call_arr_finalization(glm4_moe_tool_parser): assert args.get("city") == "Beijing" -def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser): +def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser, mock_request): """Test streaming multiple sequential tool calls.""" _reset_streaming_state(glm4_moe_tool_parser) @@ -595,7 +617,7 @@ def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # Should have two tool calls in prev_tool_call_arr @@ -604,7 +626,7 @@ def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser): assert glm4_moe_tool_parser.prev_tool_call_arr[1]["arguments"]["city"] == "Shanghai" -def test_streaming_json_escape_in_string(glm4_moe_tool_parser): +def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request): """Test that special characters in string values are properly escaped.""" _reset_streaming_state(glm4_moe_tool_parser) @@ -624,7 +646,7 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser): previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=None, + request=mock_request, ) # The streamed_args_for_tool should contain valid JSON @@ -691,7 +713,7 @@ if __name__ == "__main__": }, } ], - ) + ) # type: ignore # Simulate token-based streaming (special tags as single tokens) chunks = [ @@ -746,7 +768,7 @@ if __name__ == "__main__": assert "def bubble_sort" in parsed["content"] -def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser): +def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_request): """Test that numeric arguments are deserialized as numbers, not strings.""" model_output = """calculate operation @@ -760,7 +782,7 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser): """ extracted_tool_calls = glm4_moe_tool_parser.extract_tool_calls( - model_output, request=None + model_output, request=mock_request ) # type: ignore[arg-type] assert extracted_tool_calls.tools_called