add test_devstral.py, restore chat_template_kwargs+logprobs to all tests (vLLM-compat spec)
This commit is contained in:
479
test_devstral.py
Normal file
479
test_devstral.py
Normal file
@@ -0,0 +1,479 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test suite for mistralai/Devstral-2-123B-Instruct-2512 via SGLang middleware.
|
||||
|
||||
These tests send EXACTLY what OpenClaw would send to vLLM — including
|
||||
chat_template_kwargs, logprobs, weird tool schemas, the works.
|
||||
The middleware's job is to strip/fix all of it so SGLang doesn't choke.
|
||||
|
||||
Architecture: this test → middleware (strips bad params) → SGLang
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
import httpx
|
||||
from datetime import datetime
|
||||
|
||||
# Point at the middleware, NOT SGLang directly
|
||||
API_BASE = os.environ.get("DEVSTRAL_API_BASE", "http://127.0.0.1:8002/v1")
|
||||
API_KEY = os.environ.get("DEVSTRAL_API_KEY", "whatever")
|
||||
MODEL = os.environ.get("DEVSTRAL_MODEL", "mistralai/Devstral-2-123B-Instruct-2512")
|
||||
|
||||
RESULTS = []
|
||||
|
||||
|
||||
def ts():
|
||||
return datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
||||
|
||||
|
||||
def record(name, ok, detail=""):
|
||||
status = "✓ PASS" if ok else "✗ FAIL"
|
||||
print(f"\n{status}: {name}")
|
||||
if detail:
|
||||
print(f" {detail}")
|
||||
RESULTS.append({"name": name, "pass": ok, "detail": detail})
|
||||
|
||||
|
||||
def make_client():
|
||||
return httpx.Client(
|
||||
timeout=120.0,
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── 1. Basic non-streaming chat ──────────────────────────────
|
||||
|
||||
def test_basic_nonstream():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Basic non-streaming chat")
|
||||
print(f"{'='*60}")
|
||||
|
||||
with make_client() as c:
|
||||
r = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "Say hello in one word."}],
|
||||
"stream": False,
|
||||
"max_tokens": 32,
|
||||
})
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
body = r.json()
|
||||
if r.status_code != 200:
|
||||
print(f"[{ts()}] Error: {json.dumps(body, indent=2)}")
|
||||
record("basic non-stream", False, f"HTTP {r.status_code}: {json.dumps(body)[:200]}")
|
||||
return
|
||||
content = body["choices"][0]["message"]["content"]
|
||||
print(f"[{ts()}] Reply: {content[:100]}")
|
||||
record("basic non-stream", True, f"Got: {content[:80]}")
|
||||
|
||||
|
||||
# ── 2. Basic streaming chat ──────────────────────────────────
|
||||
|
||||
def test_basic_stream():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Basic streaming chat")
|
||||
print(f"{'='*60}")
|
||||
|
||||
with make_client() as c:
|
||||
with c.stream("POST", f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "Count from 1 to 5."}],
|
||||
"stream": True,
|
||||
"max_tokens": 64,
|
||||
}) as r:
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
if r.status_code != 200:
|
||||
body = "".join(r.iter_lines())
|
||||
print(f"[{ts()}] Error: {body[:300]}")
|
||||
record("basic stream", False, f"HTTP {r.status_code}")
|
||||
return
|
||||
full = ""
|
||||
for line in r.iter_lines():
|
||||
if not line or line == "data: [DONE]":
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
if not chunk.get("choices"): continue
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
if delta.get("content"):
|
||||
full += delta["content"]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
print(f"[{ts()}] Reply: {full[:100]}")
|
||||
record("basic stream", True, f"Got: {full[:80]}")
|
||||
|
||||
|
||||
# ── 3. Tool call — non-streaming (vLLM-style tool schema) ───
|
||||
|
||||
def test_toolcall_nonstream():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Tool call non-streaming (vLLM-style)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {"type": "string", "description": "City, e.g. 'Tokyo'"}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
with make_client() as c:
|
||||
r = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "What's the weather in Tokyo?"}],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 256,
|
||||
})
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
body = r.json()
|
||||
if r.status_code != 200:
|
||||
print(f"[{ts()}] Error: {json.dumps(body, indent=2)}")
|
||||
record("tool call non-stream", False, f"HTTP {r.status_code}: {json.dumps(body)[:200]}")
|
||||
return
|
||||
msg = body["choices"][0]["message"]
|
||||
if msg.get("tool_calls"):
|
||||
tc = msg["tool_calls"][0]
|
||||
print(f"[{ts()}] Tool: {tc['function']['name']}, args: {tc['function']['arguments']}")
|
||||
record("tool call non-stream", True, f"Got tool call: {tc['function']['name']}")
|
||||
else:
|
||||
content = msg.get("content", "")
|
||||
print(f"[{ts()}] No tool call. Content: {content[:200]}")
|
||||
record("tool call non-stream", False, "Model did not call the tool")
|
||||
|
||||
|
||||
# ── 4. Tool call — streaming ────────────────────────────────
|
||||
|
||||
def test_toolcall_stream():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Tool call streaming")
|
||||
print(f"{'='*60}")
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {"type": "string", "description": "City, e.g. 'Tokyo'"}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
with make_client() as c:
|
||||
with c.stream("POST", f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "What's the weather in Tokyo?"}],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": True,
|
||||
"max_tokens": 256,
|
||||
}) as r:
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
if r.status_code != 200:
|
||||
body = "".join(r.iter_lines())
|
||||
print(f"[{ts()}] Error: {body[:300]}")
|
||||
record("tool call stream", False, f"HTTP {r.status_code}")
|
||||
return
|
||||
tool_name = None
|
||||
accumulated_args = ""
|
||||
content_parts = ""
|
||||
for line in r.iter_lines():
|
||||
if not line or line == "data: [DONE]":
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
if not chunk.get("choices"): continue
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
if delta.get("tool_calls"):
|
||||
for tc in delta["tool_calls"]:
|
||||
if tc.get("function", {}).get("name"):
|
||||
tool_name = tc["function"]["name"]
|
||||
if tc.get("function", {}).get("arguments"):
|
||||
accumulated_args += tc["function"]["arguments"]
|
||||
if delta.get("content"):
|
||||
content_parts += delta["content"]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if tool_name:
|
||||
print(f"[{ts()}] Tool: {tool_name}, args: {accumulated_args}")
|
||||
record("tool call stream", True, f"Got tool call: {tool_name}")
|
||||
else:
|
||||
print(f"[{ts()}] No tool call. Content: {content_parts[:200]}")
|
||||
record("tool call stream", False, "Model did not call the tool")
|
||||
|
||||
|
||||
# ── 5. Full tool response flow (non-streaming) ──────────────
|
||||
|
||||
def test_tool_response_flow():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Full tool response flow (non-streaming)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {"type": "string", "description": "City, e.g. 'Tokyo'"}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
messages = [{"role": "user", "content": "What's the weather in Tokyo?"}]
|
||||
|
||||
with make_client() as c:
|
||||
r = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 256,
|
||||
})
|
||||
body = r.json()
|
||||
if r.status_code != 200:
|
||||
record("tool response flow", False, f"Step 1 failed: HTTP {r.status_code}")
|
||||
return
|
||||
msg = body["choices"][0]["message"]
|
||||
if not msg.get("tool_calls"):
|
||||
record("tool response flow", False, "No tool call in step 1")
|
||||
return
|
||||
|
||||
tc = msg["tool_calls"][0]
|
||||
tc_id = tc["id"]
|
||||
print(f"[{ts()}] Tool call: {tc['function']['name']} (id={tc_id})")
|
||||
|
||||
messages.append(msg)
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc_id,
|
||||
"content": json.dumps({"location": "Tokyo", "temperature": "22°C", "condition": "Partly cloudy"}),
|
||||
})
|
||||
|
||||
r2 = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 256,
|
||||
})
|
||||
body2 = r2.json()
|
||||
if r2.status_code != 200:
|
||||
print(f"[{ts()}] Step 2 error: {json.dumps(body2, indent=2)}")
|
||||
record("tool response flow", False, f"Step 2 failed: HTTP {r2.status_code}")
|
||||
return
|
||||
|
||||
final = body2["choices"][0]["message"].get("content", "")
|
||||
print(f"[{ts()}] Final: {final[:200]}")
|
||||
ok = "22" in final
|
||||
record("tool response flow", ok, f"Model used tool result: {'yes' if ok else 'no'} — {final[:100]}")
|
||||
|
||||
|
||||
# ── 6. Param sweep — everything OpenClaw/vLLM sends ─────────
|
||||
|
||||
def test_param_sweep():
|
||||
"""
|
||||
Sends EVERY param that OpenClaw or vLLM might include.
|
||||
The middleware must strip/fix the ones SGLang rejects.
|
||||
"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: Parameter sweep (vLLM-compat, middleware must fix)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
base_req = {
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "Say hi."}],
|
||||
"stream": False,
|
||||
"max_tokens": 32,
|
||||
}
|
||||
|
||||
# Params that OpenClaw/vLLM might send — some SGLang rejects
|
||||
extra_params = [
|
||||
("chat_template_kwargs", {"enable_thinking": False}),
|
||||
("guided_json", None),
|
||||
("guided_regex", None),
|
||||
("response_format", {"type": "json_object"}),
|
||||
("n", 1),
|
||||
("presence_penalty", 0.0),
|
||||
("frequency_penalty", 0.0),
|
||||
("top_p", 1.0),
|
||||
("temperature", 0.7),
|
||||
("seed", 42),
|
||||
("stop", ["\n"]),
|
||||
("logprobs", True),
|
||||
("top_logprobs", 5),
|
||||
]
|
||||
|
||||
with make_client() as c:
|
||||
# baseline
|
||||
r = c.post(f"{API_BASE}/chat/completions", json=base_req)
|
||||
print(f"[{ts()}] Baseline: {r.status_code}")
|
||||
|
||||
for name, val in extra_params:
|
||||
req = {**base_req, name: val}
|
||||
r = c.post(f"{API_BASE}/chat/completions", json=req)
|
||||
status = "✓" if r.status_code == 200 else "✗"
|
||||
detail = ""
|
||||
if r.status_code != 200:
|
||||
try:
|
||||
detail = r.json().get("error", {}).get("message", "")[:100]
|
||||
except Exception:
|
||||
detail = r.text[:100]
|
||||
print(f"[{ts()}] {status} {name}={val!r} → HTTP {r.status_code} {detail}")
|
||||
if r.status_code != 200:
|
||||
record(f"param sweep: {name}", False, f"HTTP {r.status_code} with {name}={val!r}: {detail}")
|
||||
|
||||
|
||||
# ── 7. OpenClaw-style tool schema (the one that caused 400) ─
|
||||
|
||||
def test_openclaw_tool_schema():
|
||||
"""
|
||||
Reproduce the exact tool schema that OpenClaw sends which has
|
||||
parameters.properties = [] instead of {}. Middleware must fix it.
|
||||
"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: OpenClaw-style tool schema (bad properties)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# This is the exact shape OpenClaw sends for tools with no params
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": [] # <-- THIS is what causes the 400
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
with make_client() as c:
|
||||
r = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": "Search for cats"}],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 128,
|
||||
})
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
body = r.json()
|
||||
if r.status_code != 200:
|
||||
print(f"[{ts()}] Error: {json.dumps(body, indent=2)[:300]}")
|
||||
record("openclaw tool schema", False, f"HTTP {r.status_code}: {json.dumps(body)[:200]}")
|
||||
return
|
||||
print(f"[{ts()}] Success — middleware fixed the bad schema")
|
||||
record("openclaw tool schema", True, "Middleware fixed parameters.properties=[] → {}")
|
||||
|
||||
|
||||
# ── 8. OpenClaw full payload (chat_template_kwargs + tools) ─
|
||||
|
||||
def test_openclaw_full_payload():
|
||||
"""
|
||||
The kitchen sink: chat_template_kwargs + logprobs + tools with bad schemas.
|
||||
Exactly what OpenClaw sends through the pipe.
|
||||
"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{ts()}] TEST: OpenClaw full payload (kitchen sink)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web using DuckDuckGo.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": [] # Bad — middleware must fix
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
with make_client() as c:
|
||||
r = c.post(f"{API_BASE}/chat/completions", json={
|
||||
"model": MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Search for the weather in NYC"},
|
||||
],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 256,
|
||||
"chat_template_kwargs": {"enable_thinking": False}, # Bad — middleware must strip
|
||||
"logprobs": True, # Bad — middleware must strip
|
||||
"top_logprobs": 5, # Bad — middleware must strip
|
||||
})
|
||||
print(f"[{ts()}] Status: {r.status_code}")
|
||||
body = r.json()
|
||||
if r.status_code != 200:
|
||||
print(f"[{ts()}] Error: {json.dumps(body, indent=2)[:300]}")
|
||||
record("openclaw full payload", False, f"HTTP {r.status_code}: {json.dumps(body)[:200]}")
|
||||
return
|
||||
msg = body["choices"][0]["message"]
|
||||
print(f"[{ts()}] Success — middleware cleaned everything")
|
||||
if msg.get("tool_calls"):
|
||||
tc = msg["tool_calls"][0]
|
||||
print(f"[{ts()}] Tool call: {tc['function']['name']}")
|
||||
else:
|
||||
print(f"[{ts()}] No tool call, content: {msg.get('content', '')[:100]}")
|
||||
record("openclaw full payload", True, "Full OpenClaw payload survived the middleware")
|
||||
|
||||
|
||||
# ── Main ─────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Devstral-2-123B Test Suite (vLLM-compat, via middleware)")
|
||||
print(f"API: {API_BASE}")
|
||||
print(f"Model: {MODEL}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
test_basic_nonstream()
|
||||
test_basic_stream()
|
||||
test_toolcall_nonstream()
|
||||
test_toolcall_stream()
|
||||
test_tool_response_flow()
|
||||
test_param_sweep()
|
||||
test_openclaw_tool_schema()
|
||||
test_openclaw_full_payload()
|
||||
|
||||
print(f"\n\n{'='*60}")
|
||||
print("FINAL RESULTS")
|
||||
print(f"{'='*60}")
|
||||
for r in RESULTS:
|
||||
s = "✓" if r["pass"] else "✗"
|
||||
print(f" {s} {r['name']}: {r['detail']}")
|
||||
passed = sum(1 for r in RESULTS if r["pass"])
|
||||
print(f"\n {passed}/{len(RESULTS)} passed")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -91,7 +91,10 @@ def test_streaming_tool_call_with_code():
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": True,
|
||||
"max_tokens": 4096
|
||||
"max_tokens": 4096,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
) as response:
|
||||
print(f"[{timestamp()}] Response status: {response.status_code}")
|
||||
@@ -242,7 +245,10 @@ def test_streaming_tool_call_with_json():
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": True,
|
||||
"max_tokens": 2048
|
||||
"max_tokens": 2048,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
) as response:
|
||||
for line in response.iter_lines():
|
||||
@@ -328,7 +334,10 @@ def test_non_streaming_tool_call():
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 1024
|
||||
"max_tokens": 1024,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -63,7 +63,10 @@ def test_simple_tool_response():
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
"max_tokens": 256,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
@@ -129,7 +132,10 @@ def test_without_tools_param():
|
||||
"messages": messages,
|
||||
# No tools param
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
"max_tokens": 256,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
@@ -211,7 +217,10 @@ def test_different_content_formats():
|
||||
"messages": msgs,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 128
|
||||
"max_tokens": 128,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -94,7 +94,10 @@ def test_tool_call_response_flow(streaming: bool = True):
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": True,
|
||||
"max_tokens": 512
|
||||
"max_tokens": 512,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
) as response:
|
||||
print(f"[{timestamp()}] Response status: {response.status_code}")
|
||||
@@ -152,7 +155,10 @@ def test_tool_call_response_flow(streaming: bool = True):
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 512
|
||||
"max_tokens": 512,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
@@ -224,7 +230,10 @@ def test_tool_call_response_flow(streaming: bool = True):
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": True,
|
||||
"max_tokens": 512
|
||||
"max_tokens": 512,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
) as response:
|
||||
for line in response.iter_lines():
|
||||
@@ -258,7 +267,10 @@ def test_tool_call_response_flow(streaming: bool = True):
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 512
|
||||
"max_tokens": 512,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
@@ -358,7 +370,10 @@ def test_tool_response_with_debug_info():
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
"max_tokens": 256,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
@@ -401,7 +416,10 @@ def test_tool_response_with_debug_info():
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"stream": False,
|
||||
"max_tokens": 256
|
||||
"max_tokens": 256,
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"logprobs": True,
|
||||
"top_logprobs": 5
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user