Files
vllm-glm/tests/test_tool_debug.py
biondizzle aa4f667ab8 Add hf.py patch to force string content format for GLM models
- Tool response content was being dropped because vLLM detected
  'openai' content format incorrectly for GLM templates
- Added _is_glm_model() detection to force 'string' format
- Updated Dockerfile to include hf.py patch
- Added debug tests for tool visibility
2026-04-09 05:20:47 +00:00

222 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""
Debug test to see what prompt the model actually receives.
"""
import httpx
import json
API_BASE = "https://api.vultrinference.com/v1"
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
MODEL = "zai-org/GLM-5.1-FP8"
def test_with_echo():
"""
Test with echo=True to see the prompt tokens.
"""
messages = [
{"role": "user", "content": "Call the test function"},
{
"role": "assistant",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "test_func", "arguments": "{}"}
}]
},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "VALUE_42"
}
]
tools = [{
"type": "function",
"function": {
"name": "test_func",
"description": "A test function",
"parameters": {"type": "object", "properties": {}}
}
}]
with httpx.Client(timeout=60.0) as client:
# Try to get prompt logprobs which might show us the prompt
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 100,
"logprobs": True,
"top_logprobs": 1,
"echo": True # Return prompt tokens
}
)
result = response.json()
print("Full response:")
print(json.dumps(result, indent=2, ensure_ascii=False))
def test_tool_only_message():
"""
Test if a tool-only message (no tools param) works.
This is what worked in the previous test.
"""
messages = [
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "calc", "arguments": "{}"}
}],
"content": None
},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "The answer is 42"
}
]
# NO tools param - this worked before
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
# NO tools param
"stream": False,
"max_tokens": 100
}
)
result = response.json()
if "choices" in result:
content = result["choices"][0]["message"]["content"]
print(f"\nNo tools param - Response: {content}")
print(f"Contains 42: {'42' in content}")
else:
print(f"\nNo tools param - Error: {result}")
def test_with_tools_param():
"""
Test WITH tools param - this is what fails.
"""
messages = [
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "calc", "arguments": "{}"}
}],
"content": None
},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "The answer is 42"
}
]
tools = [{
"type": "function",
"function": {
"name": "calc",
"description": "Calculator",
"parameters": {"type": "object", "properties": {}}
}
}]
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools, # WITH tools param
"stream": False,
"max_tokens": 100
}
)
result = response.json()
content = result["choices"][0]["message"]["content"]
print(f"\nWith tools param - Response: {content}")
print(f"Contains 42: {'42' in content}")
def test_without_assistant_tool_calls():
"""
Test if the issue is the assistant message with tool_calls.
What if we just send user -> tool response?
"""
messages = [
{"role": "user", "content": "The calculator returned this result"},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "VALUE_IS_42"
}
]
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"stream": False,
"max_tokens": 100
}
)
result = response.json()
if "choices" in result:
content = result["choices"][0]["message"]["content"]
print(f"\nNo assistant tool_calls - Response: {content}")
print(f"Contains 42: {'42' in content}")
else:
print(f"\nError: {result}")
if __name__ == "__main__":
print("=" * 60)
print("Debugging tool response visibility")
print("=" * 60)
test_tool_only_message()
test_with_tools_param()
test_without_assistant_tool_calls()