[Benchmark] Improvements to attention benchmark script (#37115)
Signed-off-by: wzhao18 <wzhao18.sz@gmail.com>
This commit is contained in:
@@ -213,6 +213,9 @@ class BenchmarkConfig:
|
||||
profile_memory: bool = False
|
||||
use_cuda_graphs: bool = False
|
||||
|
||||
# "auto" or "fp8"
|
||||
kv_cache_dtype: str = "auto"
|
||||
|
||||
# MLA-specific
|
||||
prefill_backend: str | None = None
|
||||
kv_lora_rank: int | None = None
|
||||
@@ -369,6 +372,7 @@ class ResultsFormatter:
|
||||
"backend",
|
||||
"batch_spec",
|
||||
"num_layers",
|
||||
"kv_cache_dtype",
|
||||
"mean_time",
|
||||
"std_time",
|
||||
"throughput",
|
||||
@@ -382,6 +386,7 @@ class ResultsFormatter:
|
||||
"backend": r.config.backend,
|
||||
"batch_spec": r.config.batch_spec,
|
||||
"num_layers": r.config.num_layers,
|
||||
"kv_cache_dtype": r.config.kv_cache_dtype,
|
||||
"mean_time": r.mean_time,
|
||||
"std_time": r.std_time,
|
||||
"throughput": r.throughput_tokens_per_sec or 0,
|
||||
|
||||
Reference in New Issue
Block a user