[ROCm][CI] Prep Tests For Change To ROCM_ATTN As New Default Backend On ROCm (#36025)

Signed-off-by: Micah Williamson <micah.williamson@amd.com>
2026-03-09 13:27:55 -05:00
parent 3fd03f1ec2
commit 4ff9b045fe
10 changed files with 32 additions and 10 deletions
--- a/tests/entrypoints/openai/test_tensorizer_entrypoint.py
+++ b/tests/entrypoints/openai/test_tensorizer_entrypoint.py
@@ -15,6 +15,7 @@ from vllm.model_executor.model_loader.tensorizer import (
    tensorize_lora_adapter,
    tensorize_vllm_model,
 )
+from vllm.platforms import current_platform

 from ...utils import RemoteOpenAIServer

@@ -74,6 +75,8 @@ def server(model_uri, tensorize_model_and_lora):
        MODEL_NAME,
        "--enable-lora",
    ]
+    if current_platform.is_rocm():
+        args += ["--attention-backend", "TRITON_ATTN"]

    model_dir = os.path.dirname(model_uri)
    with RemoteOpenAIServer(model_dir, args) as remote_server: