#!/usr/bin/env python3 """ Test for tool call response handling in GLM-5.1. Tests the multi-turn flow: 1. Send a prompt that triggers a tool call 2. Send back the tool result 3. Verify the model can see and use the tool response This reproduces the issue where tool responses appear blank to the model. """ import os import json import httpx from datetime import datetime API_BASE = os.environ.get("VLLM_API_BASE", "http://95.179.247.150/v1") API_KEY = os.environ.get("VLLM_API_KEY", "none") MODEL = os.environ.get("VLLM_MODEL", "HuggingFaceTB/SmolLM3-3B") def timestamp(): return datetime.now().strftime("%H:%M:%S.%f")[:-3] def test_tool_call_response_flow(streaming: bool = True): """ Test the full tool call -> response -> follow-up flow. This simulates: 1. User asks for weather 2. Model calls get_weather tool 3. We send back the weather data 4. Model should see and use that data """ tools = [ { "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "City and state, e.g. 'New York, NY'" } }, "required": ["location"] } } } ] # Initial request that should trigger a tool call messages = [ { "role": "user", "content": "What's the weather like in Tokyo right now?" } ] mode = "STREAMING" if streaming else "NON-STREAMING" print(f"\n{'='*60}") print(f"TEST: Tool call response flow ({mode})") print(f"API: {API_BASE}") print(f"Model: {MODEL}") print(f"{'='*60}\n") with httpx.Client(timeout=120.0) as client: # Step 1: Send initial request, expect tool call print(f"[{timestamp()}] Step 1: Sending initial request...") if streaming: tool_calls = [] tool_call_id = None tool_call_name = None accumulated_args = "" with client.stream( "POST", f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "tool_choice": "auto", "stream": True, "max_tokens": 512, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) as response: print(f"[{timestamp()}] Response status: {response.status_code}") for line in response.iter_lines(): if not line or line == "data: [DONE]": continue if line.startswith("data: "): try: chunk = json.loads(line[6:]) if chunk.get("choices"): delta = chunk["choices"][0].get("delta", {}) if delta.get("tool_calls"): for tc in delta["tool_calls"]: idx = tc.get("index", 0) if tc.get("id"): tool_call_id = tc["id"] if tc.get("function", {}).get("name"): tool_call_name = tc["function"]["name"] print(f"[{timestamp()}] Tool call: {tool_call_name}") if tc.get("function", {}).get("arguments"): accumulated_args += tc["function"]["arguments"] if delta.get("content"): print(f"[{timestamp()}] Content: {delta['content'][:100]}") except json.JSONDecodeError as e: print(f"[{timestamp()}] JSON error: {e}") if tool_call_name: tool_calls.append({ "id": tool_call_id or "call_0", "type": "function", "function": { "name": tool_call_name, "arguments": accumulated_args } }) else: # Non-streaming response = client.post( f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "tool_choice": "auto", "stream": False, "max_tokens": 512, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) result = response.json() print(f"[{timestamp()}] Response status: {response.status_code}") tool_calls = [] if result.get("choices"): message = result["choices"][0].get("message", {}) if message.get("tool_calls"): tool_calls = message["tool_calls"] for tc in tool_calls: print(f"[{timestamp()}] Tool call: {tc['function']['name']}") print(f"[{timestamp()}] Args: {tc['function']['arguments']}") # Check if we got a tool call if not tool_calls: print(f"\n[{timestamp()}] No tool call received - model didn't call the tool") return {"success": False, "reason": "no_tool_call"} # Step 2: Parse tool call and prepare response tc = tool_calls[0] tc_id = tc.get("id", "call_0") tc_name = tc["function"]["name"] tc_args = json.loads(tc["function"]["arguments"]) print(f"\n[{timestamp()}] Step 2: Tool call received") print(f" Name: {tc_name}") print(f" Args: {tc_args}") # Simulate tool execution tool_result = { "location": tc_args.get("location", "Unknown"), "temperature": "22°C", "condition": "Partly cloudy", "humidity": "65%", "wind": "15 km/h NE" } # Step 3: Send the tool response back messages.append({ "role": "assistant", "tool_calls": tool_calls }) messages.append({ "role": "tool", "tool_call_id": tc_id, "content": json.dumps(tool_result) }) print(f"\n[{timestamp()}] Step 3: Sending tool response...") print(f" Tool call ID: {tc_id}") print(f" Tool result: {json.dumps(tool_result, indent=2)}") # Step 4: Get the model's follow-up response if streaming: final_response = "" print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (streaming)...") with client.stream( "POST", f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "stream": True, "max_tokens": 512, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) as response: for line in response.iter_lines(): if not line or line == "data: [DONE]": continue if line.startswith("data: "): try: chunk = json.loads(line[6:]) if chunk.get("choices"): delta = chunk["choices"][0].get("delta", {}) if delta.get("content"): content = delta["content"] final_response += content print(f"[{timestamp()}] Content: {content}", end="", flush=True) except json.JSONDecodeError: pass print() # newline after streaming output else: print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (non-streaming)...") response = client.post( f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "stream": False, "max_tokens": 512, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) result = response.json() final_response = "" if result.get("choices"): final_response = result["choices"][0].get("message", {}).get("content", "") print(f"\n[{timestamp()}] Final response:\n{final_response}") # Check if the model used the tool data success = True issues = [] # The response should mention the weather data if "22" not in final_response and "22°C" not in final_response: issues.append("Temperature (22°C) not mentioned in response") success = False if "cloudy" not in final_response.lower() and "partly cloudy" not in final_response.lower(): issues.append("Condition (Partly cloudy) not mentioned in response") success = False # Check for signs the model didn't see the data blank_indicators = [ "i don't have", "i cannot access", "i'm unable to", "i am unable to", "don't have access", "don't have real-time", "cannot provide real-time" ] for indicator in blank_indicators: if indicator in final_response.lower(): issues.append(f"Model seems unaware of tool result (found: '{indicator}')") success = False break print(f"\n{'='*60}") if success: print("✓ PASS: Model correctly used tool response data") else: print("✗ FAIL: Model did not use tool response correctly") for issue in issues: print(f" - {issue}") print(f"{'='*60}\n") return { "success": success, "issues": issues, "final_response": final_response } def test_tool_response_with_debug_info(): """ Test with detailed logging to capture exactly what the model sees. """ tools = [ { "type": "function", "function": { "name": "get_time", "description": "Get the current time", "parameters": { "type": "object", "properties": {}, "required": [] } } } ] print(f"\n{'='*60}") print(f"TEST: Tool response with debug info (non-streaming)") print(f"{'='*60}\n") messages = [ {"role": "user", "content": "What time is it?"} ] with httpx.Client(timeout=120.0) as client: # Get tool call print(f"[{timestamp()}] Sending initial request...") response = client.post( f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "tool_choice": "auto", "stream": False, "max_tokens": 256, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) result = response.json() if not result.get("choices") or not result["choices"][0].get("message", {}).get("tool_calls"): print("No tool call - skipping test") return tool_call = result["choices"][0]["message"]["tool_calls"][0] tc_id = tool_call["id"] print(f"[{timestamp()}] Tool call: {tool_call['function']['name']}") print(f"[{timestamp()}] Tool call ID: {tc_id}") # Add tool response messages.append({ "role": "assistant", "tool_calls": [tool_call] }) messages.append({ "role": "tool", "tool_call_id": tc_id, "content": "The current time is 3:45 PM on Thursday, April 9, 2026." }) # Debug: print the full messages array we're about to send print(f"\n[{timestamp()}] Sending follow-up with these messages:") print(json.dumps(messages, indent=2)) # Get follow-up response2 = client.post( f"{API_BASE}/chat/completions", headers={ "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": messages, "tools": tools, "stream": False, "max_tokens": 256, "chat_template_kwargs": {"enable_thinking": False}, "logprobs": True, "top_logprobs": 5 } ) result2 = response2.json() print(f"\n[{timestamp()}] Full response:") print(json.dumps(result2, indent=2)) if result2.get("choices"): content = result2["choices"][0].get("message", {}).get("content", "") print(f"\n[{timestamp()}] Model response content: {content}") # Check if time is mentioned if "3:45" in content or "3:45 PM" in content: print("\n✓ Model used the tool response (time mentioned)") else: print("\n✗ Model may not have seen the tool response (time not mentioned)") def main(): print("\n" + "="*60) print("GLM-5.1 Tool Call Response Tests") print("="*60) # Test non-streaming first (simpler to debug) print("\n--- Test 1: Non-streaming tool response flow ---") test_tool_call_response_flow(streaming=False) # Test streaming print("\n--- Test 2: Streaming tool response flow ---") test_tool_call_response_flow(streaming=True) # Debug test print("\n--- Test 3: Debug info test ---") test_tool_response_with_debug_info() print("\nAll tests complete.") if __name__ == "__main__": main()