Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
Louie Tsai
2025-07-02 18:50:25 -06:00
committed by GitHub
parent 059d4cdb49
commit 9965c47d0d
9 changed files with 494 additions and 44 deletions

View File

@@ -0,0 +1,30 @@
[
{
"test_name": "latency_llama8B_tp1",
"environment_variables": {
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
"VLLM_CPU_KVCACHE_SPACE": 40
},
"parameters": {
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tensor_parallel_size": 1,
"load_format": "dummy",
"num_iters_warmup": 5,
"num_iters": 15
}
},
{
"test_name": "latency_llama8B_tp4",
"environment_variables": {
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
"VLLM_CPU_KVCACHE_SPACE": 40
},
"parameters": {
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tensor_parallel_size": 4,
"load_format": "dummy",
"num_iters_warmup": 5,
"num_iters": 15
}
}
]