[Tool Parser][2/3] Use self.tools instead of request.tools in tool parsers (#38189)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -27,14 +27,8 @@ def glm4_moe_tokenizer():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def glm4_moe_tool_parser(glm4_moe_tokenizer):
|
||||
return Glm4MoeModelToolParser(glm4_moe_tokenizer)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_request() -> ChatCompletionRequest:
|
||||
request = Mock(spec=ChatCompletionRequest)
|
||||
request.tools = [ # GLM45 parser needs this attribute to enable tool parsing.
|
||||
def sample_tools():
|
||||
return [
|
||||
ChatCompletionToolsParam(
|
||||
function=FunctionDefinition(
|
||||
name="get_weather",
|
||||
@@ -42,6 +36,17 @@ def mock_request() -> ChatCompletionRequest:
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools):
|
||||
return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_request(sample_tools) -> ChatCompletionRequest:
|
||||
request = Mock(spec=ChatCompletionRequest)
|
||||
request.tools = sample_tools
|
||||
return request
|
||||
|
||||
|
||||
@@ -671,14 +676,13 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
|
||||
assert '"' in parsed["message"] or "world" in parsed["message"]
|
||||
|
||||
|
||||
def test_streaming_long_content_incremental(glm4_moe_tool_parser):
|
||||
def test_streaming_long_content_incremental(glm4_moe_tokenizer):
|
||||
"""Test incremental streaming of long content (Issue #32829).
|
||||
|
||||
This is the core fix: for long string values like code (4000+ chars),
|
||||
the parser should stream incrementally rather than buffering until
|
||||
complete. This test verifies we get many fragments, not just 1-3.
|
||||
"""
|
||||
_reset_streaming_state(glm4_moe_tool_parser)
|
||||
|
||||
# Bubble sort example from Issue #32829 - realistic long content
|
||||
bubble_sort_code = '''#!/usr/bin/env python3
|
||||
@@ -705,27 +709,28 @@ if __name__ == "__main__":
|
||||
sorted_arr = bubble_sort(test_arr.copy())
|
||||
print(f"Sorted: {sorted_arr}")'''
|
||||
|
||||
# Create a request with tool schema to enable string type detection
|
||||
# Create tools with schema to enable string type detection
|
||||
# This is required for incremental streaming of string values
|
||||
tools = [
|
||||
ChatCompletionToolsParam(
|
||||
function=FunctionDefinition(
|
||||
name="write_to_file",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string"},
|
||||
"content": {"type": "string"},
|
||||
},
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
|
||||
request = ChatCompletionRequest(
|
||||
model=MODEL,
|
||||
messages=[],
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "write_to_file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string"},
|
||||
"content": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
) # type: ignore
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
# Simulate token-based streaming (special tags as single tokens)
|
||||
chunks = [
|
||||
|
||||
Reference in New Issue
Block a user