[ROCm][CI] Extending attention backend coverage for Eagle spec decode tests (#35265)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-02-25 16:16:18 -06:00
parent c97234c08b
commit 9571e99945
4 changed files with 314 additions and 150 deletions
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@@ -6,6 +6,7 @@ from typing import Any
 import pytest
 import torch._dynamo.config as dynamo_config

+from tests.utils import large_gpu_mark, single_gpu_only
 from vllm import SamplingParams
 from vllm.logprobs import Logprob
 from vllm.platforms import current_platform
@@ -36,6 +37,7 @@ default_params = dict(
 )


+@single_gpu_only
 def test_without_spec_decoding(
    sample_json_schema,
    monkeypatch: pytest.MonkeyPatch,
@@ -95,6 +97,8 @@ def test_without_spec_decoding(
    run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)


+@single_gpu_only
+@large_gpu_mark(min_gb=16)
 def test_with_spec_decoding(sample_json_schema, monkeypatch: pytest.MonkeyPatch):
    """Test consistency and acceptance rates with some different combos of
    preemption, executor, async scheduling, prefill chunking,