[CI/Build][AMD] Enable Entrypoints Integration Test (Pooling) to run without error on ROCm (#29212)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.protocol import (
|
||||
EmbeddingResponse,
|
||||
PoolingResponse,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.utils.serial_utils import (
|
||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||
@@ -28,6 +29,11 @@ from vllm.utils.serial_utils import (
|
||||
decode_pooling_output,
|
||||
)
|
||||
|
||||
if current_platform.is_rocm():
|
||||
pytest.skip(
|
||||
"Encoder self-attention is not implemented on ROCm.", allow_module_level=True
|
||||
)
|
||||
|
||||
MODEL_NAME = "intfloat/multilingual-e5-small"
|
||||
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
||||
DTYPE = "bfloat16"
|
||||
|
||||
Reference in New Issue
Block a user