Files
vllm-glm/tests/test_tool_visibility.py
biondizzle aa4f667ab8 Add hf.py patch to force string content format for GLM models
- Tool response content was being dropped because vLLM detected
  'openai' content format incorrectly for GLM templates
- Added _is_glm_model() detection to force 'string' format
- Updated Dockerfile to include hf.py patch
- Added debug tests for tool visibility
2026-04-09 05:20:47 +00:00

201 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
Minimal test - is the tool response content being passed to the model?
"""
import httpx
import json
API_BASE = "https://api.vultrinference.com/v1"
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
MODEL = "zai-org/GLM-5.1-FP8"
def test_direct_prompt():
"""
If we could send a direct prompt, what would it look like?
GLM-5.1 expects tool responses in <observations> tags:
<observations>{"result": "42"}</observations>
Let's test if the model can see content in that format.
"""
# Simulate what the prompt SHOULD look like after chat template
messages = [
{"role": "user", "content": "What did the function return?"},
{
"role": "assistant",
"content": "I'll call the function.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "get_value", "arguments": "{}"}
}]
},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "UNIQUE_MARKER_42"
}
]
tools = [{
"type": "function",
"function": {
"name": "get_value",
"description": "Get a value",
"parameters": {"type": "object", "properties": {}}
}
}]
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 100
}
)
result = response.json()
if "choices" in result:
content = result["choices"][0]["message"]["content"]
print(f"Model response: {content}")
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
else:
print(f"Error: {result}")
def test_fake_tool_response_in_user_message():
"""
Test: What if we put the tool response in a user message instead?
This bypasses the role="tool" handling entirely.
"""
messages = [
{"role": "user", "content": "What did the function return?"},
{
"role": "assistant",
"content": "I called the function.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "get_value", "arguments": "{}"}
}]
},
# Instead of role="tool", use user message
{"role": "user", "content": "The function returned: UNIQUE_MARKER_42"}
]
tools = [{
"type": "function",
"function": {
"name": "get_value",
"description": "Get a value",
"parameters": {"type": "object", "properties": {}}
}
}]
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 100
}
)
result = response.json()
if "choices" in result:
content = result["choices"][0]["message"]["content"]
print(f"\nUser message hack - Model response: {content}")
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
else:
print(f"Error: {result}")
def test_tool_response_as_observation_format():
"""
Test: What if we format the tool response in the GLM expected format?
GLM expects: <observations>content</observations>
"""
# Try putting the observations tag in the content
messages = [
{"role": "user", "content": "What did the function return?"},
{
"role": "assistant",
"content": "I called the function.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {"name": "get_value", "arguments": "{}"}
}]
},
{
"role": "tool",
"tool_call_id": "call_123",
"content": "<observations>UNIQUE_MARKER_42</observations>"
}
]
tools = [{
"type": "function",
"function": {
"name": "get_value",
"description": "Get a value",
"parameters": {"type": "object", "properties": {}}
}
}]
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"model": MODEL,
"messages": messages,
"tools": tools,
"stream": False,
"max_tokens": 100
}
)
result = response.json()
if "choices" in result:
content = result["choices"][0]["message"]["content"]
print(f"\nWith <observations> tags - Model response: {content}")
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
else:
print(f"Error: {result}")
if __name__ == "__main__":
print("Testing tool response visibility")
print("=" * 60)
test_direct_prompt()
test_fake_tool_response_in_user_message()
test_tool_response_as_observation_format()