[Tool Parser][2/3] Use self.tools instead of request.tools in tool parsers (#38189)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-03-31 01:41:36 -04:00
committed by GitHub
parent 44eef0ca1e
commit d53cb9cb8e
16 changed files with 113 additions and 105 deletions

View File

@@ -27,14 +27,8 @@ def glm4_moe_tokenizer():
@pytest.fixture
def glm4_moe_tool_parser(glm4_moe_tokenizer):
return Glm4MoeModelToolParser(glm4_moe_tokenizer)
@pytest.fixture
def mock_request() -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = [ # GLM45 parser needs this attribute to enable tool parsing.
def sample_tools():
return [
ChatCompletionToolsParam(
function=FunctionDefinition(
name="get_weather",
@@ -42,6 +36,17 @@ def mock_request() -> ChatCompletionRequest:
),
),
]
@pytest.fixture
def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools):
return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools)
@pytest.fixture
def mock_request(sample_tools) -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = sample_tools
return request
@@ -671,14 +676,13 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
assert '"' in parsed["message"] or "world" in parsed["message"]
def test_streaming_long_content_incremental(glm4_moe_tool_parser):
def test_streaming_long_content_incremental(glm4_moe_tokenizer):
"""Test incremental streaming of long content (Issue #32829).
This is the core fix: for long string values like code (4000+ chars),
the parser should stream incrementally rather than buffering until
complete. This test verifies we get many fragments, not just 1-3.
"""
_reset_streaming_state(glm4_moe_tool_parser)
# Bubble sort example from Issue #32829 - realistic long content
bubble_sort_code = '''#!/usr/bin/env python3
@@ -705,27 +709,28 @@ if __name__ == "__main__":
sorted_arr = bubble_sort(test_arr.copy())
print(f"Sorted: {sorted_arr}")'''
# Create a request with tool schema to enable string type detection
# Create tools with schema to enable string type detection
# This is required for incremental streaming of string values
tools = [
ChatCompletionToolsParam(
function=FunctionDefinition(
name="write_to_file",
parameters={
"type": "object",
"properties": {
"file_path": {"type": "string"},
"content": {"type": "string"},
},
},
),
),
]
glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
request = ChatCompletionRequest(
model=MODEL,
messages=[],
tools=[
{
"type": "function",
"function": {
"name": "write_to_file",
"parameters": {
"type": "object",
"properties": {
"file_path": {"type": "string"},
"content": {"type": "string"},
},
},
},
}
],
) # type: ignore
tools=tools,
)
# Simulate token-based streaming (special tags as single tokens)
chunks = [