[CI/Build][AMD] Enable Entrypoints Integration Test (Pooling) to run without error on ROCm (#29212)

Signed-off-by: Randall Smith <ransmith@amd.com>
Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
rasmith
2025-11-21 20:13:18 -06:00
committed by GitHub
parent 052950e5b3
commit 6f403501a0
11 changed files with 66 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.protocol import (
EmbeddingResponse,
PoolingResponse,
)
from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE,
@@ -28,6 +29,11 @@ from vllm.utils.serial_utils import (
decode_pooling_output,
)
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
MODEL_NAME = "intfloat/multilingual-e5-small"
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
DTYPE = "bfloat16"

View File

@@ -12,6 +12,12 @@ from tests.models.language.pooling.embed_utils import run_embedding_correctness_
from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
MODELS = [
EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),

View File

@@ -16,6 +16,12 @@ import pytest_asyncio
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
def _generate_random_text(word_count: int) -> str:

View File

@@ -8,6 +8,12 @@ import torch.nn.functional as F
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import PoolingResponse, RerankResponse
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
MODEL_NAME = "BAAI/bge-reranker-base"
DTYPE = "bfloat16"

View File

@@ -10,6 +10,12 @@ from torch import tensor
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import ScoreResponse
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
MODELS = [
{"name": "BAAI/bge-reranker-v2-m3", "is_cross_encoder": True},

View File

@@ -7,6 +7,12 @@ import pytest
import pytest_asyncio
from tests.utils import RemoteOpenAIServer
from vllm.platforms import current_platform
if current_platform.is_rocm():
pytest.skip(
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
)
MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2"
max_model_len = 128