[Benchmark] Don't default to temperature==0 in vllm bench serve (#32723)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -160,7 +160,6 @@ async def async_request_openai_completions(
|
||||
if request_func_input.model_name
|
||||
else request_func_input.model,
|
||||
"prompt": request_func_input.prompt,
|
||||
"temperature": 0.0,
|
||||
"repetition_penalty": 1.0,
|
||||
"max_tokens": request_func_input.output_len,
|
||||
"logprobs": request_func_input.logprobs,
|
||||
@@ -294,7 +293,6 @@ async def async_request_openai_chat_completions(
|
||||
"messages": [
|
||||
{"role": "user", "content": content},
|
||||
],
|
||||
"temperature": 0.0,
|
||||
"max_completion_tokens": request_func_input.output_len,
|
||||
"stream": True,
|
||||
"stream_options": {
|
||||
@@ -389,7 +387,6 @@ async def async_request_openai_audio(
|
||||
"model": request_func_input.model_name
|
||||
if request_func_input.model_name
|
||||
else request_func_input.model,
|
||||
"temperature": 0.0,
|
||||
"max_completion_tokens": request_func_input.output_len,
|
||||
"stream": True,
|
||||
"language": "en",
|
||||
|
||||
@@ -1419,8 +1419,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
type=float,
|
||||
default=None,
|
||||
help="Temperature sampling parameter. Only has effect on "
|
||||
"openai-compatible backends. If not specified, default to greedy "
|
||||
"decoding (i.e. temperature==0.0).",
|
||||
"openai-compatible backends.",
|
||||
)
|
||||
sampling_group.add_argument(
|
||||
"--frequency-penalty",
|
||||
@@ -1634,7 +1633,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
|
||||
)
|
||||
|
||||
if "temperature" not in sampling_params:
|
||||
sampling_params["temperature"] = 0.0 # Default to greedy decoding.
|
||||
print(
|
||||
"WARNING: vllm bench serve no longer sets temperature==0 (greedy) "
|
||||
"in requests by default. The default will be determined on the "
|
||||
"server side and can be model/API specific. "
|
||||
"For the old behavior, include --temperature=0."
|
||||
)
|
||||
|
||||
default_percentile_metrics = "ttft,tpot,itl"
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user