[UX] Make vllm bench serve discover model by default and use --input-len (#30816)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-12-17 04:55:30 -05:00
committed by GitHub
parent a100152288
commit 519ef9a911
2 changed files with 79 additions and 13 deletions

View File

@@ -19,21 +19,18 @@ def server():
@pytest.mark.benchmark
def test_bench_serve(server):
# Test default model detection and input/output len
command = [
"vllm",
"bench",
"serve",
"--model",
MODEL_NAME,
"--host",
server.host,
"--port",
str(server.port),
"--dataset-name",
"random",
"--random-input-len",
"--input-len",
"32",
"--random-output-len",
"--output-len",
"4",
"--num-prompts",
"5",