[ROCm][CI] Fix test_max_len.py for Rocm (#29916)

Signed-off-by: charlifu <charlifu@amd.com>
Signed-off-by: Charlie Fu <Charlie.Fu@amd.com>
This commit is contained in:
Charlie Fu
2025-12-08 15:58:30 -06:00
committed by GitHub
parent ae0f69b16a
commit 6af70e11a0
5 changed files with 15 additions and 8 deletions

View File

@@ -13,12 +13,15 @@ import pytest
import torch
from vllm import LLM
from vllm.platforms import current_platform
from vllm.v1.engine.llm_engine import LLMEngine
from ..conftest import HfRunner, VllmRunner
from ..models.utils import check_outputs_equal
from ..utils import multi_gpu_test
ATTN_BACKEND = ["ROCM_ATTN"] if current_platform.is_rocm() else ["FLASH_ATTN"]
MODELS = [
"hmellor/tiny-random-Gemma2ForCausalLM",
"meta-llama/Llama-3.2-1B-Instruct",
@@ -57,7 +60,7 @@ def _fix_prompt_embed_outputs(
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("backend", ["FLASH_ATTN"])
@pytest.mark.parametrize("backend", ATTN_BACKEND)
@pytest.mark.parametrize("max_tokens", [5])
@pytest.mark.parametrize("enforce_eager", [False])
@pytest.mark.parametrize("async_scheduling", [True, False])