2025-12-08 18:25:43 -08:00
|
|
|
group: Benchmarks
|
|
|
|
|
depends_on:
|
|
|
|
|
- image-build
|
|
|
|
|
steps:
|
|
|
|
|
- label: Benchmarks
|
|
|
|
|
timeout_in_minutes: 20
|
|
|
|
|
working_dir: "/vllm-workspace/.buildkite"
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- benchmarks/
|
|
|
|
|
commands:
|
|
|
|
|
- bash scripts/run-benchmarks.sh
|
|
|
|
|
|
|
|
|
|
- label: Benchmarks CLI Test
|
|
|
|
|
timeout_in_minutes: 20
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/
|
|
|
|
|
- tests/benchmarks/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s benchmarks/
|
2026-02-09 06:42:03 -08:00
|
|
|
|
|
|
|
|
- label: Attention Benchmarks Smoke Test (B200)
|
|
|
|
|
device: b200
|
|
|
|
|
num_gpus: 2
|
|
|
|
|
optional: true
|
2026-02-10 16:04:07 -05:00
|
|
|
working_dir: "/vllm-workspace/"
|
2026-02-09 06:42:03 -08:00
|
|
|
timeout_in_minutes: 10
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- benchmarks/attention_benchmarks/
|
|
|
|
|
- vllm/v1/attention/
|
|
|
|
|
commands:
|
2026-02-10 16:04:07 -05:00
|
|
|
- python3 benchmarks/attention_benchmarks/benchmark.py --backends flash flashinfer --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
|