[ROCm][CI] Fix entrypoints tests and Python-only installation test on ROCm (#28979)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2025-12-24 00:42:30 -06:00
parent 8ee90c83f8
commit 0247a91e00
26 changed files with 432 additions and 116 deletions
--- a/tests/entrypoints/pooling/basic/test_encode.py
+++ b/tests/entrypoints/pooling/basic/test_encode.py
@@ -9,11 +9,6 @@ from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.platforms import current_platform

-if current_platform.is_rocm():
-    pytest.skip(
-        "Encoder self-attention is not implemented on ROCm.", allow_module_level=True
-    )
-
 MODEL_NAME = "intfloat/multilingual-e5-small"

 PROMPTS = [
@@ -35,6 +30,12 @@ TOKEN_IDS = [

@pytest.fixture(scope="module")
 def llm():
+    # ROCm: Use FLEX_ATTENTION backend as it's the only attention backend
+    # that supports encoder-only models on ROCm.
+    attention_config = None
+    if current_platform.is_rocm():
+        attention_config = {"backend": "FLEX_ATTENTION"}
+
    # pytest caches the fixture so we use weakref.proxy to
    # enable garbage collection
    llm = LLM(
@@ -44,6 +45,7 @@ def llm():
        gpu_memory_utilization=0.75,
        enforce_eager=True,
        seed=0,
+        attention_config=attention_config,
    )

    yield weakref.proxy(llm)