[Benchmarks] Fix attention benchmark smoke test (#34269)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -22,9 +22,10 @@ steps:
|
||||
device: b200
|
||||
num_gpus: 2
|
||||
optional: true
|
||||
working_dir: "/vllm-workspace/"
|
||||
timeout_in_minutes: 10
|
||||
source_file_dependencies:
|
||||
- benchmarks/attention_benchmarks/
|
||||
- vllm/v1/attention/
|
||||
commands:
|
||||
- python benchmarks/attention_benchmarks/benchmark.py --backends flash flashinfer --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
|
||||
- python3 benchmarks/attention_benchmarks/benchmark.py --backends flash flashinfer --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
|
||||
|
||||
Reference in New Issue
Block a user