[ROCm][CI] Fix entrypoints tests and Python-only installation test on ROCm (#28979)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -14,11 +14,6 @@ from tests.utils import RemoteOpenAIServer
|
||||
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if current_platform.is_rocm():
|
||||
pytest.skip(
|
||||
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
|
||||
)
|
||||
|
||||
MODELS = [
|
||||
EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
|
||||
EmbedModelInfo(
|
||||
@@ -62,6 +57,10 @@ def server(model_info, dtype: str):
|
||||
["--trust_remote_code", "--hf_overrides", '{"matryoshka_dimensions":[256]}']
|
||||
)
|
||||
|
||||
# ROCm: Use Flex Attention to support encoder-only self-attention.
|
||||
if current_platform.is_rocm():
|
||||
args.extend(["--attention-backend", "FLEX_ATTENTION"])
|
||||
|
||||
with RemoteOpenAIServer(model_info.name, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
Reference in New Issue
Block a user