[CI] Fix flaky tool_use chat completion tests with deterministic seed (#37027)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import pytest
|
|||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
MESSAGES_WITHOUT_TOOLS,
|
MESSAGES_WITHOUT_TOOLS,
|
||||||
|
SEED,
|
||||||
WEATHER_TOOL,
|
WEATHER_TOOL,
|
||||||
ServerConfig,
|
ServerConfig,
|
||||||
ensure_system_prompt,
|
ensure_system_prompt,
|
||||||
@@ -27,6 +28,7 @@ async def test_chat_completion_without_tools(
|
|||||||
max_completion_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
stop_reason = chat_completion.choices[0].finish_reason
|
stop_reason = chat_completion.choices[0].finish_reason
|
||||||
@@ -47,6 +49,7 @@ async def test_chat_completion_without_tools(
|
|||||||
max_completion_tokens=150,
|
max_completion_tokens=150,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
chunks: list[str] = []
|
chunks: list[str] = []
|
||||||
@@ -97,6 +100,7 @@ async def test_chat_completion_with_tools(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL],
|
tools=[WEATHER_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
stop_reason = chat_completion.choices[0].finish_reason
|
stop_reason = chat_completion.choices[0].finish_reason
|
||||||
@@ -118,6 +122,7 @@ async def test_chat_completion_with_tools(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
tools=[WEATHER_TOOL],
|
tools=[WEATHER_TOOL],
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from .utils import (
|
|||||||
MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||||
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||||
SEARCH_TOOL,
|
SEARCH_TOOL,
|
||||||
|
SEED,
|
||||||
WEATHER_TOOL,
|
WEATHER_TOOL,
|
||||||
ServerConfig,
|
ServerConfig,
|
||||||
)
|
)
|
||||||
@@ -39,6 +40,7 @@ async def test_parallel_tool_calls(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@@ -76,6 +78,7 @@ async def test_parallel_tool_calls(
|
|||||||
max_completion_tokens=200,
|
max_completion_tokens=200,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -166,6 +169,7 @@ async def test_parallel_tool_calls_with_results(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@@ -184,6 +188,7 @@ async def test_parallel_tool_calls_with_results(
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -229,6 +234,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
parallel_tool_calls=False,
|
parallel_tool_calls=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -247,6 +253,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
|
|||||||
max_completion_tokens=200,
|
max_completion_tokens=200,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
parallel_tool_calls=False,
|
parallel_tool_calls=False,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from .utils import (
|
|||||||
MESSAGES_ASKING_FOR_TOOLS,
|
MESSAGES_ASKING_FOR_TOOLS,
|
||||||
MESSAGES_WITH_TOOL_RESPONSE,
|
MESSAGES_WITH_TOOL_RESPONSE,
|
||||||
SEARCH_TOOL,
|
SEARCH_TOOL,
|
||||||
|
SEED,
|
||||||
WEATHER_TOOL,
|
WEATHER_TOOL,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,6 +28,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@@ -71,6 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
|||||||
max_completion_tokens=100,
|
max_completion_tokens=100,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -154,6 +157,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = chat_completion.choices[0]
|
choice = chat_completion.choices[0]
|
||||||
@@ -171,6 +175,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
|||||||
model=model_name,
|
model=model_name,
|
||||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||||
logprobs=False,
|
logprobs=False,
|
||||||
|
seed=SEED,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ def ensure_system_prompt(
|
|||||||
|
|
||||||
# universal args for all models go here. also good if you need to test locally
|
# universal args for all models go here. also good if you need to test locally
|
||||||
# and change type or KV cache quantization or something.
|
# and change type or KV cache quantization or something.
|
||||||
|
SEED = 42
|
||||||
|
|
||||||
ARGS: list[str] = [
|
ARGS: list[str] = [
|
||||||
"--enable-auto-tool-choice",
|
"--enable-auto-tool-choice",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
|
|||||||
Reference in New Issue
Block a user