Files
vllm/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
Woosuk Kwon 71683ca6f6 [V0 Deprecation] Remove multi-step scheduling (#22138)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
2025-08-12 20:18:39 -07:00

21 lines
553 B
JSON

[
{
"test_name": "llama8B_tp1_genai_perf",
"qps_list": [4,8,16,32],
"common_parameters": {
"model": "meta-llama/Meta-Llama-3-8B-Instruct",
"tp": 1,
"port": 8000,
"num_prompts": 500,
"reuse_server": false
},
"vllm_server_parameters": {
"disable_log_stats": "",
"gpu_memory_utilization": 0.9,
"max_num_seqs": 512,
"dtype": "bfloat16"
},
"genai_perf_input_parameters": {
}
}
]