[ROCm][CI] Fix test_max_len.py for Rocm (#29916)
Signed-off-by: charlifu <charlifu@amd.com> Signed-off-by: Charlie Fu <Charlie.Fu@amd.com>
This commit is contained in:
@@ -13,12 +13,15 @@ import pytest
|
||||
import torch
|
||||
|
||||
from vllm import LLM
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.v1.engine.llm_engine import LLMEngine
|
||||
|
||||
from ..conftest import HfRunner, VllmRunner
|
||||
from ..models.utils import check_outputs_equal
|
||||
from ..utils import multi_gpu_test
|
||||
|
||||
ATTN_BACKEND = ["ROCM_ATTN"] if current_platform.is_rocm() else ["FLASH_ATTN"]
|
||||
|
||||
MODELS = [
|
||||
"hmellor/tiny-random-Gemma2ForCausalLM",
|
||||
"meta-llama/Llama-3.2-1B-Instruct",
|
||||
@@ -57,7 +60,7 @@ def _fix_prompt_embed_outputs(
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("backend", ["FLASH_ATTN"])
|
||||
@pytest.mark.parametrize("backend", ATTN_BACKEND)
|
||||
@pytest.mark.parametrize("max_tokens", [5])
|
||||
@pytest.mark.parametrize("enforce_eager", [False])
|
||||
@pytest.mark.parametrize("async_scheduling", [True, False])
|
||||
|
||||
Reference in New Issue
Block a user