vllm-glm/tests/test_tool_visibility.py

#!/usr/bin/env python3
"""
Minimal test - is the tool response content being passed to the model?
"""

import httpx
import json

API_BASE = "https://api.vultrinference.com/v1"
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
MODEL = "zai-org/GLM-5.1-FP8"


def test_direct_prompt():
    """
    If we could send a direct prompt, what would it look like?

    GLM-5.1 expects tool responses in <observations> tags:
    <observations>{"result": "42"}</observations>

    Let's test if the model can see content in that format.
    """

    # Simulate what the prompt SHOULD look like after chat template
    messages = [
        {"role": "user", "content": "What did the function return?"},
        {
            "role": "assistant",
            "content": "I'll call the function.",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "get_value", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "UNIQUE_MARKER_42"
        }
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "get_value",
            "description": "Get a value",
            "parameters": {"type": "object", "properties": {}}
        }
    }]

    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()

        if "choices" in result:
            content = result["choices"][0]["message"]["content"]
            print(f"Model response: {content}")
            print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
        else:
            print(f"Error: {result}")


def test_fake_tool_response_in_user_message():
    """
    Test: What if we put the tool response in a user message instead?
    This bypasses the role="tool" handling entirely.
    """

    messages = [
        {"role": "user", "content": "What did the function return?"},
        {
            "role": "assistant",
            "content": "I called the function.",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "get_value", "arguments": "{}"}
            }]
        },
        # Instead of role="tool", use user message
        {"role": "user", "content": "The function returned: UNIQUE_MARKER_42"}
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "get_value",
            "description": "Get a value",
            "parameters": {"type": "object", "properties": {}}
        }
    }]

    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()

        if "choices" in result:
            content = result["choices"][0]["message"]["content"]
            print(f"\nUser message hack - Model response: {content}")
            print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
        else:
            print(f"Error: {result}")


def test_tool_response_as_observation_format():
    """
    Test: What if we format the tool response in the GLM expected format?
    GLM expects: <observations>content</observations>
    """

    # Try putting the observations tag in the content
    messages = [
        {"role": "user", "content": "What did the function return?"},
        {
            "role": "assistant",
            "content": "I called the function.",
            "tool_calls": [{
                "id": "call_123",
                "type": "function",
                "function": {"name": "get_value", "arguments": "{}"}
            }]
        },
        {
            "role": "tool",
            "tool_call_id": "call_123",
            "content": "<observations>UNIQUE_MARKER_42</observations>"
        }
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "get_value",
            "description": "Get a value",
            "parameters": {"type": "object", "properties": {}}
        }
    }]

    with httpx.Client(timeout=60.0) as client:
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 100
            }
        )

        result = response.json()

        if "choices" in result:
            content = result["choices"][0]["message"]["content"]
            print(f"\nWith <observations> tags - Model response: {content}")
            print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
        else:
            print(f"Error: {result}")


if __name__ == "__main__":
    print("Testing tool response visibility")
    print("=" * 60)

    test_direct_prompt()
    test_fake_tool_response_in_user_message()
    test_tool_response_as_observation_format()