tests/test_tool_diagnosis.py

#!/usr/bin/env python3
"""
Focused test to diagnose GLM-5.1 tool response issue.

The issue: Model sees tool response as blank.
"""

import httpx
import json

API_BASE = "https://api.vultrinference.com/v1"
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
MODEL = "zai-org/GLM-5.1-FP8"


def test_simple_tool_response():
    """
    Minimal test: Send a tool response and see if the model can use it.
    """
    
    # Simulate a conversation where a tool was called
    messages = [
        {"role": "user", "content": "Call the test function"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "test_func", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "SUCCESS: The function returned value 42"
        }
    ]
    
    tools = [{
        "type": "function",
        "function": {
            "name": "test_func",
            "description": "A test function",
            "parameters": {"type": "object", "properties": {}}
        }
    }]
    
    print("=" * 60)
    print("Request messages:")
    print(json.dumps(messages, indent=2))
    print("=" * 60)
    
    with httpx.Client(timeout=60.0) as client:
        # Non-streaming to get full response
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 256
            }
        )
        
        result = response.json()
        
        print("\nFull response:")
        print(json.dumps(result, indent=2))
        
        if result.get("choices"):
            content = result["choices"][0].get("message", {}).get("content", "")
            print("\n" + "=" * 60)
            print("Model response content:")
            print(content)
            print("=" * 60)
            
            # Check if the tool result is referenced
            if "42" in content:
                print("\n✓ PASS: Model referenced the tool result (42)")
            else:
                print("\n✗ FAIL: Model did NOT reference the tool result (42)")
                
            # Check for signs the model didn't see the result
            if "don't have" in content.lower() or "cannot access" in content.lower():
                print("✗ Model indicates it cannot see tool result")


def test_without_tools_param():
    """
    Test what happens if we don't pass tools in the follow-up request.
    Some APIs need tools to be passed on every request.
    """
    
    messages = [
        {"role": "user", "content": "Call the test function"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "test_func", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "SUCCESS: The function returned value 42"
        }
    ]
    
    print("\n" + "=" * 60)
    print("Test WITHOUT tools param in follow-up")
    print("=" * 60)
    
    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                # No tools param
                "stream": False,
                "max_tokens": 256
            }
        )
        
        result = response.json()
        
        if result.get("choices"):
            content = result["choices"][0].get("message", {}).get("content", "")
            print("Model response:", content[:200])
            
            if "42" in content:
                print("✓ Model referenced the tool result")


def test_different_content_formats():
    """
    Test if the issue is with how content is formatted.
    """
    
    # Test 1: String content (standard)
    messages_string = [
        {"role": "user", "content": "What is 2+2?"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "calc", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "The answer is 4"
        }
    ]
    
    # Test 2: Content as array (OpenAI format)
    messages_array = [
        {"role": "user", "content": "What is 2+2?"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "calc", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": [{"type": "text", "text": "The answer is 4"}]
        }
    ]
    
    tools = [{
        "type": "function",
        "function": {
            "name": "calc",
            "description": "Calculator",
            "parameters": {"type": "object", "properties": {}}
        }
    }]
    
    print("\n" + "=" * 60)
    print("Test: String content vs Array content")
    print("=" * 60)
    
    with httpx.Client(timeout=60.0) as client:
        for name, msgs in [("String content", messages_string), ("Array content", messages_array)]:
            print(f"\n--- {name} ---")
            response = client.post(
                f"{API_BASE}/chat/completions",
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": MODEL,
                    "messages": msgs,
                    "tools": tools,
                    "stream": False,
                    "max_tokens": 128
                }
            )
            
            result = response.json()
            if result.get("choices"):
                content = result["choices"][0].get("message", {}).get("content", "")
                print(f"Response: {content[:150]}")
                if "4" in content:
                    print("✓ Referenced tool result")
                else:
                    print("✗ Did NOT reference tool result")


if __name__ == "__main__":
    print("GLM-5.1 Tool Response Diagnosis")
    print("=" * 60)
    
    test_simple_tool_response()
    test_without_tools_param()
    test_different_content_formats()
patch parser 2026-04-09 04:28:22 +00:00			`#!/usr/bin/env python3`
			`"""`
			`Focused test to diagnose GLM-5.1 tool response issue.`

			`The issue: Model sees tool response as blank.`
			`"""`

			`import httpx`
			`import json`

			`API_BASE = "https://api.vultrinference.com/v1"`
			`API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"`
			`MODEL = "zai-org/GLM-5.1-FP8"`


			`def test_simple_tool_response():`
			`"""`
			`Minimal test: Send a tool response and see if the model can use it.`
			`"""`

			`# Simulate a conversation where a tool was called`
			`messages = [`
			`{"role": "user", "content": "Call the test function"},`
			`{`
			`"role": "assistant",`
			`"tool_calls": [{`
			`"id": "call_123",`
			`"type": "function",`
			`"function": {"name": "test_func", "arguments": "{}"}`
			`}]`
			`},`
			`{`
			`"role": "tool",`
			`"tool_call_id": "call_123",`
			`"content": "SUCCESS: The function returned value 42"`
			`}`
			`]`

			`tools = [{`
			`"type": "function",`
			`"function": {`
			`"name": "test_func",`
			`"description": "A test function",`
			`"parameters": {"type": "object", "properties": {}}`
			`}`
			`}]`

			`print("=" * 60)`
			`print("Request messages:")`
			`print(json.dumps(messages, indent=2))`
			`print("=" * 60)`

			`with httpx.Client(timeout=60.0) as client:`
			`# Non-streaming to get full response`
			`response = client.post(`
			`f"{API_BASE}/chat/completions",`
			`headers={`
			`"Authorization": f"Bearer {API_KEY}",`
			`"Content-Type": "application/json"`
			`},`
			`json={`
			`"model": MODEL,`
			`"messages": messages,`
			`"tools": tools,`
			`"stream": False,`
			`"max_tokens": 256`
			`}`
			`)`

			`result = response.json()`

			`print("\nFull response:")`
			`print(json.dumps(result, indent=2))`

			`if result.get("choices"):`
			`content = result["choices"][0].get("message", {}).get("content", "")`
			`print("\n" + "=" * 60)`
			`print("Model response content:")`
			`print(content)`
			`print("=" * 60)`

			`# Check if the tool result is referenced`
			`if "42" in content:`
			`print("\n✓ PASS: Model referenced the tool result (42)")`
			`else:`
			`print("\n✗ FAIL: Model did NOT reference the tool result (42)")`

			`# Check for signs the model didn't see the result`
			`if "don't have" in content.lower() or "cannot access" in content.lower():`
			`print("✗ Model indicates it cannot see tool result")`


			`def test_without_tools_param():`
			`"""`
			`Test what happens if we don't pass tools in the follow-up request.`
			`Some APIs need tools to be passed on every request.`
			`"""`

			`messages = [`
			`{"role": "user", "content": "Call the test function"},`
			`{`
			`"role": "assistant",`
			`"tool_calls": [{`
			`"id": "call_123",`
			`"type": "function",`
			`"function": {"name": "test_func", "arguments": "{}"}`
			`}]`
			`},`
			`{`
			`"role": "tool",`
			`"tool_call_id": "call_123",`
			`"content": "SUCCESS: The function returned value 42"`
			`}`
			`]`

			`print("\n" + "=" * 60)`
			`print("Test WITHOUT tools param in follow-up")`
			`print("=" * 60)`

			`with httpx.Client(timeout=60.0) as client:`
			`response = client.post(`
			`f"{API_BASE}/chat/completions",`
			`headers={`
			`"Authorization": f"Bearer {API_KEY}",`
			`"Content-Type": "application/json"`
			`},`
			`json={`
			`"model": MODEL,`
			`"messages": messages,`
			`# No tools param`
			`"stream": False,`
			`"max_tokens": 256`
			`}`
			`)`

			`result = response.json()`

			`if result.get("choices"):`
			`content = result["choices"][0].get("message", {}).get("content", "")`
			`print("Model response:", content[:200])`

			`if "42" in content:`
			`print("✓ Model referenced the tool result")`


			`def test_different_content_formats():`
			`"""`
			`Test if the issue is with how content is formatted.`
			`"""`

			`# Test 1: String content (standard)`
			`messages_string = [`
			`{"role": "user", "content": "What is 2+2?"},`
			`{`
			`"role": "assistant",`
			`"tool_calls": [{`
			`"id": "call_123",`
			`"type": "function",`
			`"function": {"name": "calc", "arguments": "{}"}`
			`}]`
			`},`
			`{`
			`"role": "tool",`
			`"tool_call_id": "call_123",`
			`"content": "The answer is 4"`
			`}`
			`]`

			`# Test 2: Content as array (OpenAI format)`
			`messages_array = [`
			`{"role": "user", "content": "What is 2+2?"},`
			`{`
			`"role": "assistant",`
			`"tool_calls": [{`
			`"id": "call_123",`
			`"type": "function",`
			`"function": {"name": "calc", "arguments": "{}"}`
			`}]`
			`},`
			`{`
			`"role": "tool",`
			`"tool_call_id": "call_123",`
			`"content": [{"type": "text", "text": "The answer is 4"}]`
			`}`
			`]`

			`tools = [{`
			`"type": "function",`
			`"function": {`
			`"name": "calc",`
			`"description": "Calculator",`
			`"parameters": {"type": "object", "properties": {}}`
			`}`
			`}]`

			`print("\n" + "=" * 60)`
			`print("Test: String content vs Array content")`
			`print("=" * 60)`

			`with httpx.Client(timeout=60.0) as client:`
			`for name, msgs in [("String content", messages_string), ("Array content", messages_array)]:`
			`print(f"\n--- {name} ---")`
			`response = client.post(`
			`f"{API_BASE}/chat/completions",`
			`headers={`
			`"Authorization": f"Bearer {API_KEY}",`
			`"Content-Type": "application/json"`
			`},`
			`json={`
			`"model": MODEL,`
			`"messages": msgs,`
			`"tools": tools,`
			`"stream": False,`
			`"max_tokens": 128`
			`}`
			`)`

			`result = response.json()`
			`if result.get("choices"):`
			`content = result["choices"][0].get("message", {}).get("content", "")`
			`print(f"Response: {content[:150]}")`
			`if "4" in content:`
			`print("✓ Referenced tool result")`
			`else:`
			`print("✗ Did NOT reference tool result")`


			`if __name__ == "__main__":`
			`print("GLM-5.1 Tool Response Diagnosis")`
			`print("=" * 60)`

			`test_simple_tool_response()`
			`test_without_tools_param()`
			`test_different_content_formats()`