Add nightly b200 test for spec decode eagle correctness (#38577)
Signed-off-by: Rishi Puri <riship@nvidia.com>
This commit is contained in:
@@ -12,6 +12,17 @@ steps:
|
|||||||
commands:
|
commands:
|
||||||
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
||||||
|
|
||||||
|
- label: Spec Decode Eagle Nightly B200
|
||||||
|
timeout_in_minutes: 30
|
||||||
|
device: b200
|
||||||
|
optional: true
|
||||||
|
source_file_dependencies:
|
||||||
|
- vllm/v1/spec_decode/
|
||||||
|
- vllm/v1/worker/gpu/spec_decode/
|
||||||
|
- tests/v1/e2e/spec_decode/
|
||||||
|
commands:
|
||||||
|
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
|
||||||
|
|
||||||
- label: Spec Decode Speculators + MTP
|
- label: Spec Decode Speculators + MTP
|
||||||
timeout_in_minutes: 30
|
timeout_in_minutes: 30
|
||||||
device: h200_18gb
|
device: h200_18gb
|
||||||
@@ -23,6 +34,18 @@ steps:
|
|||||||
commands:
|
commands:
|
||||||
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
||||||
|
|
||||||
|
- label: Spec Decode Speculators + MTP Nightly B200
|
||||||
|
timeout_in_minutes: 30
|
||||||
|
device: b200
|
||||||
|
optional: true
|
||||||
|
source_file_dependencies:
|
||||||
|
- vllm/v1/spec_decode/
|
||||||
|
- vllm/v1/worker/gpu/spec_decode/
|
||||||
|
- vllm/transformers_utils/configs/speculators/
|
||||||
|
- tests/v1/e2e/spec_decode/
|
||||||
|
commands:
|
||||||
|
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
|
||||||
|
|
||||||
- label: Spec Decode Ngram + Suffix
|
- label: Spec Decode Ngram + Suffix
|
||||||
timeout_in_minutes: 30
|
timeout_in_minutes: 30
|
||||||
device: h200_18gb
|
device: h200_18gb
|
||||||
@@ -42,3 +65,14 @@ steps:
|
|||||||
- tests/v1/e2e/spec_decode/
|
- tests/v1/e2e/spec_decode/
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
||||||
|
|
||||||
|
- label: Spec Decode Draft Model Nightly B200
|
||||||
|
timeout_in_minutes: 30
|
||||||
|
device: b200
|
||||||
|
optional: true
|
||||||
|
source_file_dependencies:
|
||||||
|
- vllm/v1/spec_decode/
|
||||||
|
- vllm/v1/worker/gpu/spec_decode/
|
||||||
|
- tests/v1/e2e/spec_decode/
|
||||||
|
commands:
|
||||||
|
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
|
||||||
|
|||||||
Reference in New Issue
Block a user