SmolLM3-3B tool call fix: template bugs found and patched
This commit is contained in:
445
test_tool_response.py
Normal file
445
test_tool_response.py
Normal file
@@ -0,0 +1,445 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test for tool call response handling in GLM-5.1.
|
||||
|
||||
Tests the multi-turn flow:
|
||||
1. Send a prompt that triggers a tool call
|
||||
2. Send back the tool result
|
||||
3. Verify the model can see and use the tool response
|
||||
|
||||
This reproduces the issue where tool responses appear blank to the model.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import httpx
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
API_BASE = os.environ.get("VLLM_API_BASE", "http://95.179.247.150/v1")
|
||||
API_KEY = os.environ.get("VLLM_API_KEY", "none")
|
||||
MODEL = os.environ.get("VLLM_MODEL", "HuggingFaceTB/SmolLM3-3B")
|
||||
|
||||
|
||||
def timestamp():
|
||||
return datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
||||
|
||||
|
||||
def test_tool_call_response_flow(streaming: bool = True):
|
||||
"""
|
||||
Test the full tool call -> response -> follow-up flow.
|
||||
|
||||
This simulates:
|
||||
1. User asks for weather
|
||||
2. Model calls get_weather tool
|
||||
3. We send back the weather data
|
||||
4. Model should see and use that data
|
||||
"""
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "City and state, e.g. 'New York, NY'"
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Initial request that should trigger a tool call
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Tokyo right now?"
|
||||
}
|
||||
]
|
||||
|
||||
mode = "STREAMING" if streaming else "NON-STREAMING"
|
||||
print(f"\n{'='*60}")
|
||||
print(f"TEST: Tool call response flow ({mode})")
|
||||
print(f"API: {API_BASE}")
|
||||
print(f"Model: {MODEL}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
with httpx.Client(timeout=120.0) as client:
|
||||
# Step 1: Send initial request, expect tool call
|
||||
print(f"[{timestamp()}] Step 1: Sending initial request...")
|
||||
|
||||
if streaming:
|
||||
tool_calls = []
|
||||
tool_call_id = None
|
||||
tool_call_name = None
|
||||
accumulated_args = ""
|
||||
|
||||
with client.stream(
|
||||
"POST",
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": True,
|
||||
"max_tokens": 512
|
||||
}
|
||||
) as response:
|
||||
print(f"[{timestamp()}] Response status: {response.status_code}")
|
||||
|
||||
for line in response.iter_lines():
|
||||
if not line or line == "data: [DONE]":
|
||||
continue
|
||||
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
if chunk.get("choices"):
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
|
||||
if delta.get("tool_calls"):
|
||||
for tc in delta["tool_calls"]:
|
||||
idx = tc.get("index", 0)
|
||||
|
||||
if tc.get("id"):
|
||||
tool_call_id = tc["id"]
|
||||
|
||||
if tc.get("function", {}).get("name"):
|
||||
tool_call_name = tc["function"]["name"]
|
||||
print(f"[{timestamp()}] Tool call: {tool_call_name}")
|
||||
|
||||
if tc.get("function", {}).get("arguments"):
|
||||
accumulated_args += tc["function"]["arguments"]
|
||||
|
||||
if delta.get("content"):
|
||||
print(f"[{timestamp()}] Content: {delta['content'][:100]}")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"[{timestamp()}] JSON error: {e}")
|
||||
|
||||
if tool_call_name:
|
||||
tool_calls.append({
|
||||
"id": tool_call_id or "call_0",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool_call_name,
|
||||
"arguments": accumulated_args
|
||||
}
|
||||
})
|
||||
else:
|
||||
# Non-streaming
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 512
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
print(f"[{timestamp()}] Response status: {response.status_code}")
|
||||
|
||||
tool_calls = []
|
||||
if result.get("choices"):
|
||||
message = result["choices"][0].get("message", {})
|
||||
if message.get("tool_calls"):
|
||||
tool_calls = message["tool_calls"]
|
||||
for tc in tool_calls:
|
||||
print(f"[{timestamp()}] Tool call: {tc['function']['name']}")
|
||||
print(f"[{timestamp()}] Args: {tc['function']['arguments']}")
|
||||
|
||||
# Check if we got a tool call
|
||||
if not tool_calls:
|
||||
print(f"\n[{timestamp()}] No tool call received - model didn't call the tool")
|
||||
return {"success": False, "reason": "no_tool_call"}
|
||||
|
||||
# Step 2: Parse tool call and prepare response
|
||||
tc = tool_calls[0]
|
||||
tc_id = tc.get("id", "call_0")
|
||||
tc_name = tc["function"]["name"]
|
||||
tc_args = json.loads(tc["function"]["arguments"])
|
||||
|
||||
print(f"\n[{timestamp()}] Step 2: Tool call received")
|
||||
print(f" Name: {tc_name}")
|
||||
print(f" Args: {tc_args}")
|
||||
|
||||
# Simulate tool execution
|
||||
tool_result = {
|
||||
"location": tc_args.get("location", "Unknown"),
|
||||
"temperature": "22°C",
|
||||
"condition": "Partly cloudy",
|
||||
"humidity": "65%",
|
||||
"wind": "15 km/h NE"
|
||||
}
|
||||
|
||||
# Step 3: Send the tool response back
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"tool_calls": tool_calls
|
||||
})
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc_id,
|
||||
"content": json.dumps(tool_result)
|
||||
})
|
||||
|
||||
print(f"\n[{timestamp()}] Step 3: Sending tool response...")
|
||||
print(f" Tool call ID: {tc_id}")
|
||||
print(f" Tool result: {json.dumps(tool_result, indent=2)}")
|
||||
|
||||
# Step 4: Get the model's follow-up response
|
||||
if streaming:
|
||||
final_response = ""
|
||||
print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (streaming)...")
|
||||
|
||||
with client.stream(
|
||||
"POST",
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": True,
|
||||
"max_tokens": 512
|
||||
}
|
||||
) as response:
|
||||
for line in response.iter_lines():
|
||||
if not line or line == "data: [DONE]":
|
||||
continue
|
||||
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
if chunk.get("choices"):
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
if delta.get("content"):
|
||||
content = delta["content"]
|
||||
final_response += content
|
||||
print(f"[{timestamp()}] Content: {content}", end="", flush=True)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
print() # newline after streaming output
|
||||
else:
|
||||
print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (non-streaming)...")
|
||||
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 512
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
final_response = ""
|
||||
if result.get("choices"):
|
||||
final_response = result["choices"][0].get("message", {}).get("content", "")
|
||||
|
||||
print(f"\n[{timestamp()}] Final response:\n{final_response}")
|
||||
|
||||
# Check if the model used the tool data
|
||||
success = True
|
||||
issues = []
|
||||
|
||||
# The response should mention the weather data
|
||||
if "22" not in final_response and "22°C" not in final_response:
|
||||
issues.append("Temperature (22°C) not mentioned in response")
|
||||
success = False
|
||||
|
||||
if "cloudy" not in final_response.lower() and "partly cloudy" not in final_response.lower():
|
||||
issues.append("Condition (Partly cloudy) not mentioned in response")
|
||||
success = False
|
||||
|
||||
# Check for signs the model didn't see the data
|
||||
blank_indicators = [
|
||||
"i don't have",
|
||||
"i cannot access",
|
||||
"i'm unable to",
|
||||
"i am unable to",
|
||||
"don't have access",
|
||||
"don't have real-time",
|
||||
"cannot provide real-time"
|
||||
]
|
||||
|
||||
for indicator in blank_indicators:
|
||||
if indicator in final_response.lower():
|
||||
issues.append(f"Model seems unaware of tool result (found: '{indicator}')")
|
||||
success = False
|
||||
break
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
if success:
|
||||
print("✓ PASS: Model correctly used tool response data")
|
||||
else:
|
||||
print("✗ FAIL: Model did not use tool response correctly")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"issues": issues,
|
||||
"final_response": final_response
|
||||
}
|
||||
|
||||
|
||||
def test_tool_response_with_debug_info():
|
||||
"""
|
||||
Test with detailed logging to capture exactly what the model sees.
|
||||
"""
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_time",
|
||||
"description": "Get the current time",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"TEST: Tool response with debug info (non-streaming)")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "What time is it?"}
|
||||
]
|
||||
|
||||
with httpx.Client(timeout=120.0) as client:
|
||||
# Get tool call
|
||||
print(f"[{timestamp()}] Sending initial request...")
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if not result.get("choices") or not result["choices"][0].get("message", {}).get("tool_calls"):
|
||||
print("No tool call - skipping test")
|
||||
return
|
||||
|
||||
tool_call = result["choices"][0]["message"]["tool_calls"][0]
|
||||
tc_id = tool_call["id"]
|
||||
|
||||
print(f"[{timestamp()}] Tool call: {tool_call['function']['name']}")
|
||||
print(f"[{timestamp()}] Tool call ID: {tc_id}")
|
||||
|
||||
# Add tool response
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"tool_calls": [tool_call]
|
||||
})
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc_id,
|
||||
"content": "The current time is 3:45 PM on Thursday, April 9, 2026."
|
||||
})
|
||||
|
||||
# Debug: print the full messages array we're about to send
|
||||
print(f"\n[{timestamp()}] Sending follow-up with these messages:")
|
||||
print(json.dumps(messages, indent=2))
|
||||
|
||||
# Get follow-up
|
||||
response2 = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
}
|
||||
)
|
||||
|
||||
result2 = response2.json()
|
||||
print(f"\n[{timestamp()}] Full response:")
|
||||
print(json.dumps(result2, indent=2))
|
||||
|
||||
if result2.get("choices"):
|
||||
content = result2["choices"][0].get("message", {}).get("content", "")
|
||||
|
||||
print(f"\n[{timestamp()}] Model response content: {content}")
|
||||
|
||||
# Check if time is mentioned
|
||||
if "3:45" in content or "3:45 PM" in content:
|
||||
print("\n✓ Model used the tool response (time mentioned)")
|
||||
else:
|
||||
print("\n✗ Model may not have seen the tool response (time not mentioned)")
|
||||
|
||||
|
||||
def main():
|
||||
print("\n" + "="*60)
|
||||
print("GLM-5.1 Tool Call Response Tests")
|
||||
print("="*60)
|
||||
|
||||
# Test non-streaming first (simpler to debug)
|
||||
print("\n--- Test 1: Non-streaming tool response flow ---")
|
||||
test_tool_call_response_flow(streaming=False)
|
||||
|
||||
# Test streaming
|
||||
print("\n--- Test 2: Streaming tool response flow ---")
|
||||
test_tool_call_response_flow(streaming=True)
|
||||
|
||||
# Debug test
|
||||
print("\n--- Test 3: Debug info test ---")
|
||||
test_tool_response_with_debug_info()
|
||||
|
||||
print("\nAll tests complete.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user