[CI] Stabilize test_no_args_tool_call and add ROCm-specific server args (#36107)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -9,6 +9,8 @@ import openai # use the official client for correctness check
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
# downloading lora to test lora requests
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
@@ -139,8 +141,19 @@ def server():
|
||||
"qwen3",
|
||||
"--gpu-memory-utilization",
|
||||
"0.4",
|
||||
"--enforce-eager",
|
||||
]
|
||||
|
||||
rocm_args = {
|
||||
"--max-num-seqs": "1",
|
||||
"--no-enable-prefix-caching": None,
|
||||
}
|
||||
if current_platform.is_rocm():
|
||||
for k, v in rocm_args.items():
|
||||
args.append(k)
|
||||
if v is not None:
|
||||
args.append(v)
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
@@ -294,7 +307,10 @@ async def test_no_args_tool_call(
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_time",
|
||||
"description": "Get the current date and time. No parameters needed.",
|
||||
"description": (
|
||||
"Get the current date and time. Call this when the user "
|
||||
"asks what time or date it is. No parameters needed."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {}, # No parameters
|
||||
@@ -303,10 +319,28 @@ async def test_no_args_tool_call(
|
||||
},
|
||||
}
|
||||
]
|
||||
messages = [{"role": "user", "content": "What time is it now?"}]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful assistant. Always use the available tools "
|
||||
"when relevant, and reply with a short sentence after "
|
||||
"receiving a tool result."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": "What time is it now?"},
|
||||
]
|
||||
|
||||
shared_kwargs = dict(
|
||||
model=model_name,
|
||||
temperature=0.0,
|
||||
seed=42,
|
||||
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
|
||||
)
|
||||
|
||||
# Step 2: Send user message and let model decide whether to call the tool
|
||||
response = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
**shared_kwargs,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto", # Let model choose automatically
|
||||
@@ -334,11 +368,15 @@ async def test_no_args_tool_call(
|
||||
)
|
||||
# Step 5: Send tool result back to model to continue conversation
|
||||
final_response = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
**shared_kwargs,
|
||||
messages=messages,
|
||||
max_completion_tokens=128,
|
||||
)
|
||||
# Output final natural language response
|
||||
assert final_response.choices[0].message.content is not None
|
||||
assert (
|
||||
final_response.choices[0].message.content is not None
|
||||
and final_response.choices[0].message.content.strip() != ""
|
||||
)
|
||||
|
||||
else:
|
||||
# No tool called — just print model's direct reply
|
||||
|
||||
Reference in New Issue
Block a user