Add hf.py patch to force string content format for GLM models
- Tool response content was being dropped because vLLM detected 'openai' content format incorrectly for GLM templates - Added _is_glm_model() detection to force 'string' format - Updated Dockerfile to include hf.py patch - Added debug tests for tool visibility
This commit is contained in:
221
tests/test_tool_debug.py
Normal file
221
tests/test_tool_debug.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug test to see what prompt the model actually receives.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import json
|
||||
|
||||
API_BASE = "https://api.vultrinference.com/v1"
|
||||
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
|
||||
MODEL = "zai-org/GLM-5.1-FP8"
|
||||
|
||||
|
||||
def test_with_echo():
|
||||
"""
|
||||
Test with echo=True to see the prompt tokens.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "Call the test function"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "test_func", "arguments": "{}"}
|
||||
}]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "VALUE_42"
|
||||
}
|
||||
]
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_func",
|
||||
"description": "A test function",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
}
|
||||
}]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
# Try to get prompt logprobs which might show us the prompt
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 100,
|
||||
"logprobs": True,
|
||||
"top_logprobs": 1,
|
||||
"echo": True # Return prompt tokens
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
print("Full response:")
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
def test_tool_only_message():
|
||||
"""
|
||||
Test if a tool-only message (no tools param) works.
|
||||
This is what worked in the previous test.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "What is 2+2?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "calc", "arguments": "{}"}
|
||||
}],
|
||||
"content": None
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "The answer is 42"
|
||||
}
|
||||
]
|
||||
|
||||
# NO tools param - this worked before
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
# NO tools param
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
if "choices" in result:
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"\nNo tools param - Response: {content}")
|
||||
print(f"Contains 42: {'42' in content}")
|
||||
else:
|
||||
print(f"\nNo tools param - Error: {result}")
|
||||
|
||||
|
||||
def test_with_tools_param():
|
||||
"""
|
||||
Test WITH tools param - this is what fails.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "What is 2+2?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "calc", "arguments": "{}"}
|
||||
}],
|
||||
"content": None
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "The answer is 42"
|
||||
}
|
||||
]
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "calc",
|
||||
"description": "Calculator",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
}
|
||||
}]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools, # WITH tools param
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"\nWith tools param - Response: {content}")
|
||||
print(f"Contains 42: {'42' in content}")
|
||||
|
||||
|
||||
def test_without_assistant_tool_calls():
|
||||
"""
|
||||
Test if the issue is the assistant message with tool_calls.
|
||||
What if we just send user -> tool response?
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "The calculator returned this result"},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "VALUE_IS_42"
|
||||
}
|
||||
]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
if "choices" in result:
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"\nNo assistant tool_calls - Response: {content}")
|
||||
print(f"Contains 42: {'42' in content}")
|
||||
else:
|
||||
print(f"\nError: {result}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("Debugging tool response visibility")
|
||||
print("=" * 60)
|
||||
|
||||
test_tool_only_message()
|
||||
test_with_tools_param()
|
||||
test_without_assistant_tool_calls()
|
||||
200
tests/test_tool_visibility.py
Normal file
200
tests/test_tool_visibility.py
Normal file
@@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Minimal test - is the tool response content being passed to the model?
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import json
|
||||
|
||||
API_BASE = "https://api.vultrinference.com/v1"
|
||||
API_KEY = "26DN7PNUB3YRBEPCDNMXKKD6ZODMETRSMOZQ"
|
||||
MODEL = "zai-org/GLM-5.1-FP8"
|
||||
|
||||
|
||||
def test_direct_prompt():
|
||||
"""
|
||||
If we could send a direct prompt, what would it look like?
|
||||
|
||||
GLM-5.1 expects tool responses in <observations> tags:
|
||||
<observations>{"result": "42"}</observations>
|
||||
|
||||
Let's test if the model can see content in that format.
|
||||
"""
|
||||
|
||||
# Simulate what the prompt SHOULD look like after chat template
|
||||
messages = [
|
||||
{"role": "user", "content": "What did the function return?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll call the function.",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "get_value", "arguments": "{}"}
|
||||
}]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "UNIQUE_MARKER_42"
|
||||
}
|
||||
]
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_value",
|
||||
"description": "Get a value",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
}
|
||||
}]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if "choices" in result:
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"Model response: {content}")
|
||||
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
|
||||
else:
|
||||
print(f"Error: {result}")
|
||||
|
||||
|
||||
def test_fake_tool_response_in_user_message():
|
||||
"""
|
||||
Test: What if we put the tool response in a user message instead?
|
||||
This bypasses the role="tool" handling entirely.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "What did the function return?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I called the function.",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "get_value", "arguments": "{}"}
|
||||
}]
|
||||
},
|
||||
# Instead of role="tool", use user message
|
||||
{"role": "user", "content": "The function returned: UNIQUE_MARKER_42"}
|
||||
]
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_value",
|
||||
"description": "Get a value",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
}
|
||||
}]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if "choices" in result:
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"\nUser message hack - Model response: {content}")
|
||||
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
|
||||
else:
|
||||
print(f"Error: {result}")
|
||||
|
||||
|
||||
def test_tool_response_as_observation_format():
|
||||
"""
|
||||
Test: What if we format the tool response in the GLM expected format?
|
||||
GLM expects: <observations>content</observations>
|
||||
"""
|
||||
|
||||
# Try putting the observations tag in the content
|
||||
messages = [
|
||||
{"role": "user", "content": "What did the function return?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I called the function.",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {"name": "get_value", "arguments": "{}"}
|
||||
}]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_123",
|
||||
"content": "<observations>UNIQUE_MARKER_42</observations>"
|
||||
}
|
||||
]
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_value",
|
||||
"description": "Get a value",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
}
|
||||
}]
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 100
|
||||
}
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if "choices" in result:
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
print(f"\nWith <observations> tags - Model response: {content}")
|
||||
print(f"Contains UNIQUE_MARKER_42: {'UNIQUE_MARKER_42' in content}")
|
||||
else:
|
||||
print(f"Error: {result}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Testing tool response visibility")
|
||||
print("=" * 60)
|
||||
|
||||
test_direct_prompt()
|
||||
test_fake_tool_response_in_user_message()
|
||||
test_tool_response_as_observation_format()
|
||||
Reference in New Issue
Block a user