[CI] Fix flaky tool_use chat completion tests with deterministic seed (#37027)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import pytest
|
||||
|
||||
from .utils import (
|
||||
MESSAGES_WITHOUT_TOOLS,
|
||||
SEED,
|
||||
WEATHER_TOOL,
|
||||
ServerConfig,
|
||||
ensure_system_prompt,
|
||||
@@ -27,6 +28,7 @@ async def test_chat_completion_without_tools(
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
@@ -47,6 +49,7 @@ async def test_chat_completion_without_tools(
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
chunks: list[str] = []
|
||||
@@ -97,6 +100,7 @@ async def test_chat_completion_with_tools(
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
@@ -118,6 +122,7 @@ async def test_chat_completion_with_tools(
|
||||
model=model_name,
|
||||
logprobs=False,
|
||||
tools=[WEATHER_TOOL],
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from .utils import (
|
||||
MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||
SEARCH_TOOL,
|
||||
SEED,
|
||||
WEATHER_TOOL,
|
||||
ServerConfig,
|
||||
)
|
||||
@@ -39,6 +40,7 @@ async def test_parallel_tool_calls(
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
@@ -76,6 +78,7 @@ async def test_parallel_tool_calls(
|
||||
max_completion_tokens=200,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
@@ -166,6 +169,7 @@ async def test_parallel_tool_calls_with_results(
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
@@ -184,6 +188,7 @@ async def test_parallel_tool_calls_with_results(
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
@@ -229,6 +234,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
parallel_tool_calls=False,
|
||||
)
|
||||
|
||||
@@ -247,6 +253,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
|
||||
max_completion_tokens=200,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
parallel_tool_calls=False,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
@@ -10,6 +10,7 @@ from .utils import (
|
||||
MESSAGES_ASKING_FOR_TOOLS,
|
||||
MESSAGES_WITH_TOOL_RESPONSE,
|
||||
SEARCH_TOOL,
|
||||
SEED,
|
||||
WEATHER_TOOL,
|
||||
)
|
||||
|
||||
@@ -27,6 +28,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
@@ -71,6 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
||||
max_completion_tokens=100,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
@@ -154,6 +157,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
@@ -171,6 +175,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
seed=SEED,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ def ensure_system_prompt(
|
||||
|
||||
# universal args for all models go here. also good if you need to test locally
|
||||
# and change type or KV cache quantization or something.
|
||||
SEED = 42
|
||||
|
||||
ARGS: list[str] = [
|
||||
"--enable-auto-tool-choice",
|
||||
"--max-model-len",
|
||||
|
||||
Reference in New Issue
Block a user