Add nightly b200 test for spec decode eagle correctness (#38577)
Signed-off-by: Rishi Puri <riship@nvidia.com>
This commit is contained in:
@@ -12,6 +12,17 @@ steps:
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
||||
|
||||
- label: Spec Decode Eagle Nightly B200
|
||||
timeout_in_minutes: 30
|
||||
device: b200
|
||||
optional: true
|
||||
source_file_dependencies:
|
||||
- vllm/v1/spec_decode/
|
||||
- vllm/v1/worker/gpu/spec_decode/
|
||||
- tests/v1/e2e/spec_decode/
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
||||
|
||||
- label: Spec Decode Speculators + MTP
|
||||
timeout_in_minutes: 30
|
||||
device: h200_18gb
|
||||
@@ -23,6 +34,18 @@ steps:
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
||||
|
||||
- label: Spec Decode Speculators + MTP Nightly B200
|
||||
timeout_in_minutes: 30
|
||||
device: b200
|
||||
optional: true
|
||||
source_file_dependencies:
|
||||
- vllm/v1/spec_decode/
|
||||
- vllm/v1/worker/gpu/spec_decode/
|
||||
- vllm/transformers_utils/configs/speculators/
|
||||
- tests/v1/e2e/spec_decode/
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
||||
|
||||
- label: Spec Decode Ngram + Suffix
|
||||
timeout_in_minutes: 30
|
||||
device: h200_18gb
|
||||
@@ -42,3 +65,14 @@ steps:
|
||||
- tests/v1/e2e/spec_decode/
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
||||
|
||||
- label: Spec Decode Draft Model Nightly B200
|
||||
timeout_in_minutes: 30
|
||||
device: b200
|
||||
optional: true
|
||||
source_file_dependencies:
|
||||
- vllm/v1/spec_decode/
|
||||
- vllm/v1/worker/gpu/spec_decode/
|
||||
- tests/v1/e2e/spec_decode/
|
||||
commands:
|
||||
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
||||
|
||||
Reference in New Issue
Block a user