2026-03-13 14:16:02 -07:00
|
|
|
group: Spec Decode
|
|
|
|
|
depends_on:
|
|
|
|
|
- image-build
|
|
|
|
|
steps:
|
|
|
|
|
- label: Spec Decode Eagle
|
|
|
|
|
timeout_in_minutes: 30
|
2026-04-05 13:26:11 -07:00
|
|
|
device: h200_18gb
|
2026-03-13 14:16:02 -07:00
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
|
|
|
|
|
2026-04-09 13:09:09 -07:00
|
|
|
- label: Spec Decode Eagle Nightly B200
|
|
|
|
|
timeout_in_minutes: 30
|
|
|
|
|
device: b200
|
|
|
|
|
optional: true
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
|
|
|
|
|
2026-03-13 14:16:02 -07:00
|
|
|
- label: Spec Decode Speculators + MTP
|
|
|
|
|
timeout_in_minutes: 30
|
2026-04-05 13:26:11 -07:00
|
|
|
device: h200_18gb
|
2026-03-13 14:16:02 -07:00
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- vllm/transformers_utils/configs/speculators/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
|
|
|
|
|
2026-04-09 13:09:09 -07:00
|
|
|
- label: Spec Decode Speculators + MTP Nightly B200
|
|
|
|
|
timeout_in_minutes: 30
|
|
|
|
|
device: b200
|
|
|
|
|
optional: true
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- vllm/transformers_utils/configs/speculators/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
|
|
|
|
|
2026-03-13 14:16:02 -07:00
|
|
|
- label: Spec Decode Ngram + Suffix
|
|
|
|
|
timeout_in_minutes: 30
|
2026-04-05 13:26:11 -07:00
|
|
|
device: h200_18gb
|
2026-03-13 14:16:02 -07:00
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
|
|
|
|
|
|
|
|
|
|
- label: Spec Decode Draft Model
|
|
|
|
|
timeout_in_minutes: 30
|
2026-04-05 13:26:11 -07:00
|
|
|
device: h200_18gb
|
2026-03-13 14:16:02 -07:00
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
2026-04-09 13:09:09 -07:00
|
|
|
|
|
|
|
|
- label: Spec Decode Draft Model Nightly B200
|
|
|
|
|
timeout_in_minutes: 30
|
|
|
|
|
device: b200
|
|
|
|
|
optional: true
|
|
|
|
|
source_file_dependencies:
|
|
|
|
|
- vllm/v1/spec_decode/
|
|
|
|
|
- vllm/v1/worker/gpu/spec_decode/
|
|
|
|
|
- tests/v1/e2e/spec_decode/
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|