vllm-glm/tests/test_tool_debug.py

#!/usr/bin/env python3
"""
Debug test to see what prompt the model actually receives.
"""

import httpx
import json

API_BASE = "https://api.vultrinference.com/v1"
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
MODEL = "zai-org/GLM-5.1-FP8"


def test_with_echo():
    """
    Test with echo=True to see the prompt tokens.
    """

    messages = [
        {"role": "user", "content": "Call the test function"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "test_func", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "VALUE_42"
        }
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "test_func",
            "description": "A test function",
            "parameters": {"type": "object", "properties": {}}
        }
    }]

    with httpx.Client(timeout=60.0) as client:
        # Try to get prompt logprobs which might show us the prompt
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 100,
                "logprobs": True,
                "top_logprobs": 1,
                "echo": True  # Return prompt tokens
            }
        )

        result = response.json()

        print("Full response:")
        print(json.dumps(result, indent=2, ensure_ascii=False))


def test_tool_only_message():
    """
    Test if a tool-only message (no tools param) works.
    This is what worked in the previous test.
    """

    messages = [
        {"role": "user", "content": "What is 2+2?"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "calc", "arguments": "{}"}
            }],
            "content": None
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "The answer is 42"
        }
    ]

    # NO tools param - this worked before
    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                # NO tools param
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()
        if "choices" in result:
            content = result["choices"][0]["message"]["content"]
            print(f"\nNo tools param - Response: {content}")
            print(f"Contains 42: {'42' in content}")
        else:
            print(f"\nNo tools param - Error: {result}")


def test_with_tools_param():
    """
    Test WITH tools param - this is what fails.
    """

    messages = [
        {"role": "user", "content": "What is 2+2?"},
        {
            "role": "assistant",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "calc", "arguments": "{}"}
            }],
            "content": None
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "The answer is 42"
        }
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "calc",
            "description": "Calculator",
            "parameters": {"type": "object", "properties": {}}
        }
    }]

    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,  # WITH tools param
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()
        content = result["choices"][0]["message"]["content"]
        print(f"\nWith tools param - Response: {content}")
        print(f"Contains 42: {'42' in content}")


def test_without_assistant_tool_calls():
    """
    Test if the issue is the assistant message with tool_calls.
    What if we just send user -> tool response?
    """

    messages = [
        {"role": "user", "content": "The calculator returned this result"},
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "VALUE_IS_42"
        }
    ]

    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()
        if "choices" in result:
            content = result["choices"][0]["message"]["content"]
            print(f"\nNo assistant tool_calls - Response: {content}")
            print(f"Contains 42: {'42' in content}")
        else:
            print(f"\nError: {result}")


if __name__ == "__main__":
    print("=" * 60)
    print("Debugging tool response visibility")
    print("=" * 60)

    test_tool_only_message()
    test_with_tools_param()
    test_without_assistant_tool_calls()