[ROCm][CI] Fix entrypoints tests and Python-only installation test on ROCm (#28979)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2025-12-24 00:42:30 -06:00
committed by GitHub
parent 8ee90c83f8
commit 0247a91e00
26 changed files with 432 additions and 116 deletions

View File

@@ -18,11 +18,6 @@ from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
def _generate_random_text(word_count: int) -> str:
"""Generate random text with approximately the specified word count."""
@@ -228,6 +223,10 @@ def server_with_chunked_processing():
"0.8",
]
# ROCm: Use Flex Attention to support encoder-only self-attention.
if current_platform.is_rocm():
args.extend(["--attention-backend", "FLEX_ATTENTION"])
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server