[ROCm][CI] Prep Tests For Change To ROCM_ATTN As New Default Backend On ROCm (#36025)
Signed-off-by: Micah Williamson <micah.williamson@amd.com>
This commit is contained in:
@@ -13,6 +13,7 @@ import pytest
|
||||
import torch
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="In V1, we reject tokens > max_seq_len")
|
||||
@@ -65,7 +66,8 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
|
||||
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
|
||||
# with 400 Client Error: Bad Request.
|
||||
m.setenv("HF_TOKEN", "")
|
||||
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
|
||||
attn_backend = "TRITON_ATTN" if current_platform.is_rocm() else "auto"
|
||||
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat", attention_backend=attn_backend)
|
||||
|
||||
prompts = [
|
||||
"Hello, my name is",
|
||||
|
||||
Reference in New Issue
Block a user