Files
vllm-glm/tests/test_tool_response.py

446 lines
15 KiB
Python
Raw Permalink Normal View History

2026-04-09 04:28:22 +00:00
#!/usr/bin/env python3
"""
Test for tool call response handling in GLM-5.1.
Tests the multi-turn flow:
1. Send a prompt that triggers a tool call
2. Send back the tool result
3. Verify the model can see and use the tool response
This reproduces the issue where tool responses appear blank to the model.
"""
import os
import json
import httpx
from datetime import datetime
API_BASE = os.environ.get("VLLM_API_BASE", "http://localhost:8000/v1")
API_KEY = os.environ.get("VLLM_API_KEY", "none")
MODEL = os.environ.get("VLLM_MODEL", "zai-org/GLM-5.1-FP8")
def timestamp():
return datetime.now().strftime("%H:%M:%S.%f")[:-3]
def test_tool_call_response_flow(streaming: bool = True):
"""
Test the full tool call -> response -> follow-up flow.
This simulates:
1. User asks for weather
2. Model calls get_weather tool
3. We send back the weather data
4. Model should see and use that data
"""
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g. 'New York, NY'"
}
},
"required": ["location"]
}
}
}
]
# Initial request that should trigger a tool call
messages = [
{
"role": "user",
"content": "What's the weather like in Tokyo right now?"
}
]
mode = "STREAMING" if streaming else "NON-STREAMING"
print(f"\n{'='*60}")
print(f"TEST: Tool call response flow ({mode})")
print(f"API: {API_BASE}")
print(f"Model: {MODEL}")
print(f"{'='*60}\n")
with httpx.Client(timeout=120.0) as client:
# Step 1: Send initial request, expect tool call
print(f"[{timestamp()}] Step 1: Sending initial request...")
if streaming:
tool_calls = []
tool_call_id = None
tool_call_name = None
accumulated_args = ""
with client.stream(
"POST",
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"tool_choice": "auto",
"stream": True,
"max_tokens": 512
}
) as response:
print(f"[{timestamp()}] Response status: {response.status_code}")
for line in response.iter_lines():
if not line or line == "data: [DONE]":
continue
if line.startswith("data: "):
try:
chunk = json.loads(line[6:])
if chunk.get("choices"):
delta = chunk["choices"][0].get("delta", {})
if delta.get("tool_calls"):
for tc in delta["tool_calls"]:
idx = tc.get("index", 0)
if tc.get("id"):
tool_call_id = tc["id"]
if tc.get("function", {}).get("name"):
tool_call_name = tc["function"]["name"]
print(f"[{timestamp()}] Tool call: {tool_call_name}")
if tc.get("function", {}).get("arguments"):
accumulated_args += tc["function"]["arguments"]
if delta.get("content"):
print(f"[{timestamp()}] Content: {delta['content'][:100]}")
except json.JSONDecodeError as e:
print(f"[{timestamp()}] JSON error: {e}")
if tool_call_name:
tool_calls.append({
"id": tool_call_id or "call_0",
"type": "function",
"function": {
"name": tool_call_name,
"arguments": accumulated_args
}
})
else:
# Non-streaming
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"tool_choice": "auto",
"stream": False,
"max_tokens": 512
}
)
result = response.json()
print(f"[{timestamp()}] Response status: {response.status_code}")
tool_calls = []
if result.get("choices"):
message = result["choices"][0].get("message", {})
if message.get("tool_calls"):
tool_calls = message["tool_calls"]
for tc in tool_calls:
print(f"[{timestamp()}] Tool call: {tc['function']['name']}")
print(f"[{timestamp()}] Args: {tc['function']['arguments']}")
# Check if we got a tool call
if not tool_calls:
print(f"\n[{timestamp()}] No tool call received - model didn't call the tool")
return {"success": False, "reason": "no_tool_call"}
# Step 2: Parse tool call and prepare response
tc = tool_calls[0]
tc_id = tc.get("id", "call_0")
tc_name = tc["function"]["name"]
tc_args = json.loads(tc["function"]["arguments"])
print(f"\n[{timestamp()}] Step 2: Tool call received")
print(f" Name: {tc_name}")
print(f" Args: {tc_args}")
# Simulate tool execution
tool_result = {
"location": tc_args.get("location", "Unknown"),
"temperature": "22°C",
"condition": "Partly cloudy",
"humidity": "65%",
"wind": "15 km/h NE"
}
# Step 3: Send the tool response back
messages.append({
"role": "assistant",
"tool_calls": tool_calls
})
messages.append({
"role": "tool",
"tool_call_id": tc_id,
"content": json.dumps(tool_result)
})
print(f"\n[{timestamp()}] Step 3: Sending tool response...")
print(f" Tool call ID: {tc_id}")
print(f" Tool result: {json.dumps(tool_result, indent=2)}")
# Step 4: Get the model's follow-up response
if streaming:
final_response = ""
print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (streaming)...")
with client.stream(
"POST",
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": True,
"max_tokens": 512
}
) as response:
for line in response.iter_lines():
if not line or line == "data: [DONE]":
continue
if line.startswith("data: "):
try:
chunk = json.loads(line[6:])
if chunk.get("choices"):
delta = chunk["choices"][0].get("delta", {})
if delta.get("content"):
content = delta["content"]
final_response += content
print(f"[{timestamp()}] Content: {content}", end="", flush=True)
except json.JSONDecodeError:
pass
print() # newline after streaming output
else:
print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (non-streaming)...")
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 512
}
)
result = response.json()
final_response = ""
if result.get("choices"):
final_response = result["choices"][0].get("message", {}).get("content", "")
print(f"\n[{timestamp()}] Final response:\n{final_response}")
# Check if the model used the tool data
success = True
issues = []
# The response should mention the weather data
if "22" not in final_response and "22°C" not in final_response:
issues.append("Temperature (22°C) not mentioned in response")
success = False
if "cloudy" not in final_response.lower() and "partly cloudy" not in final_response.lower():
issues.append("Condition (Partly cloudy) not mentioned in response")
success = False
# Check for signs the model didn't see the data
blank_indicators = [
"i don't have",
"i cannot access",
"i'm unable to",
"i am unable to",
"don't have access",
"don't have real-time",
"cannot provide real-time"
]
for indicator in blank_indicators:
if indicator in final_response.lower():
issues.append(f"Model seems unaware of tool result (found: '{indicator}')")
success = False
break
print(f"\n{'='*60}")
if success:
print("✓ PASS: Model correctly used tool response data")
else:
print("✗ FAIL: Model did not use tool response correctly")
for issue in issues:
print(f" - {issue}")
print(f"{'='*60}\n")
return {
"success": success,
"issues": issues,
"final_response": final_response
}
def test_tool_response_with_debug_info():
"""
Test with detailed logging to capture exactly what the model sees.
"""
tools = [
{
"type": "function",
"function": {
"name": "get_time",
"description": "Get the current time",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]
print(f"\n{'='*60}")
print(f"TEST: Tool response with debug info (non-streaming)")
print(f"{'='*60}\n")
messages = [
{"role": "user", "content": "What time is it?"}
]
with httpx.Client(timeout=120.0) as client:
# Get tool call
print(f"[{timestamp()}] Sending initial request...")
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"tool_choice": "auto",
"stream": False,
"max_tokens": 256
}
)
result = response.json()
if not result.get("choices") or not result["choices"][0].get("message", {}).get("tool_calls"):
print("No tool call - skipping test")
return
tool_call = result["choices"][0]["message"]["tool_calls"][0]
tc_id = tool_call["id"]
print(f"[{timestamp()}] Tool call: {tool_call['function']['name']}")
print(f"[{timestamp()}] Tool call ID: {tc_id}")
# Add tool response
messages.append({
"role": "assistant",
"tool_calls": [tool_call]
})
messages.append({
"role": "tool",
"tool_call_id": tc_id,
"content": "The current time is 3:45 PM on Thursday, April 9, 2026."
})
# Debug: print the full messages array we're about to send
print(f"\n[{timestamp()}] Sending follow-up with these messages:")
print(json.dumps(messages, indent=2))
# Get follow-up
response2 = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 256
}
)
result2 = response2.json()
print(f"\n[{timestamp()}] Full response:")
print(json.dumps(result2, indent=2))
if result2.get("choices"):
content = result2["choices"][0].get("message", {}).get("content", "")
print(f"\n[{timestamp()}] Model response content: {content}")
# Check if time is mentioned
if "3:45" in content or "3:45 PM" in content:
print("\n✓ Model used the tool response (time mentioned)")
else:
print("\n✗ Model may not have seen the tool response (time not mentioned)")
def main():
print("\n" + "="*60)
print("GLM-5.1 Tool Call Response Tests")
print("="*60)
# Test non-streaming first (simpler to debug)
print("\n--- Test 1: Non-streaming tool response flow ---")
test_tool_call_response_flow(streaming=False)
# Test streaming
print("\n--- Test 2: Streaming tool response flow ---")
test_tool_call_response_flow(streaming=True)
# Debug test
print("\n--- Test 3: Debug info test ---")
test_tool_response_with_debug_info()
print("\nAll tests complete.")
if __name__ == "__main__":
main()