diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index 2abfa78d3..a8cb5cd86 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -16,7 +16,7 @@ steps: - pytest -v -s v1/sample - pytest -v -s v1/logits_processors - pytest -v -s v1/worker - - pytest -v -s v1/spec_decode + - pytest -v -s -m 'not slow_test' v1/spec_decode - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit - pytest -v -s -m 'not cpu_test' v1/metrics - pytest -v -s v1/test_oracle.py @@ -166,4 +166,18 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pip install pytest-timeout pytest-forked - pytest -v -s v1/determinism/test_batch_invariance.py - - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py \ No newline at end of file + - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py + +- label: Acceptance Length Test (Large Models) # optional + timeout_in_minutes: 25 + gpu: h100 + optional: true + num_gpus: 1 + working_dir: "/vllm-workspace/tests" + source_file_dependencies: + - vllm/v1/spec_decode/ + - vllm/model_executor/models/mlp_speculator.py + - tests/v1/spec_decode/test_acceptance_length.py + commands: + - export VLLM_ALLOW_INSECURE_SERIALIZATION=1 + - pytest -v -s v1/spec_decode/test_acceptance_length.py -m slow_test diff --git a/tests/v1/spec_decode/test_acceptance_length.py b/tests/v1/spec_decode/test_acceptance_length.py index 4f43c9247..8a6a72781 100644 --- a/tests/v1/spec_decode/test_acceptance_length.py +++ b/tests/v1/spec_decode/test_acceptance_length.py @@ -35,7 +35,7 @@ class Eagle3ModelConfig: id: str = "" # Backends that are incompatible with this model (will be skipped) excluded_backends: set[AttentionBackendEnum] = field(default_factory=set) - # Pytest marks for this configuration (e.g., pytest.mark.optional) + # Pytest marks for this configuration marks: list = field(default_factory=list) # Custom relative tolerance (defaults to DEFAULT_RTOL if None) rtol: float | None = None