Add vllm bench [latency, throughput] CLI commands (#16508)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-04-15 00:10:35 -06:00
committed by GitHub
parent bc5dd4f669
commit b4fe16c75b
11 changed files with 1771 additions and 2 deletions

View File

@@ -0,0 +1,19 @@
# SPDX-License-Identifier: Apache-2.0
import subprocess
import pytest
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
@pytest.mark.benchmark
def test_bench_throughput():
command = [
"vllm", "bench", "throughput", "--model", MODEL_NAME, "--input-len",
"32", "--output-len", "1", "--enforce-eager", "--load-format", "dummy"
]
result = subprocess.run(command, capture_output=True, text=True)
print(result.stdout)
print(result.stderr)
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"