[Benchmark] Improvements to attention benchmark script (#37115)

Signed-off-by: wzhao18 <wzhao18.sz@gmail.com>
This commit is contained in:
Wei Zhao
2026-03-16 18:22:40 -04:00
committed by GitHub
parent e5b807607c
commit a3a51d20e7
6 changed files with 311 additions and 68 deletions

View File

@@ -213,6 +213,9 @@ class BenchmarkConfig:
profile_memory: bool = False
use_cuda_graphs: bool = False
# "auto" or "fp8"
kv_cache_dtype: str = "auto"
# MLA-specific
prefill_backend: str | None = None
kv_lora_rank: int | None = None
@@ -369,6 +372,7 @@ class ResultsFormatter:
"backend",
"batch_spec",
"num_layers",
"kv_cache_dtype",
"mean_time",
"std_time",
"throughput",
@@ -382,6 +386,7 @@ class ResultsFormatter:
"backend": r.config.backend,
"batch_spec": r.config.batch_spec,
"num_layers": r.config.num_layers,
"kv_cache_dtype": r.config.kv_cache_dtype,
"mean_time": r.mean_time,
"std_time": r.std_time,
"throughput": r.throughput_tokens_per_sec or 0,