[CI] Split V1 e2e + engine (1 GPU) into separate jobs (#36945)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Kevin H. Luu
2026-03-13 14:16:02 -07:00
committed by GitHub
parent 0005d2a3c9
commit f1816fb192
18 changed files with 81 additions and 39 deletions

View File

@@ -0,0 +1,40 @@
group: Spec Decode
depends_on:
- image-build
steps:
- label: Spec Decode Eagle
timeout_in_minutes: 30
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
- label: Spec Decode Speculators + MTP
timeout_in_minutes: 30
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- vllm/transformers_utils/configs/speculators/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
- label: Spec Decode Ngram + Suffix
timeout_in_minutes: 30
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
- label: Spec Decode Draft Model
timeout_in_minutes: 30
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"