[ROCm][CI] Fix flaky Cohere/OpenAI embedding parity test (#37616)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -10,7 +10,7 @@ import numpy as np
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "BAAI/bge-base-en-v1.5"
|
||||
DTYPE = "bfloat16"
|
||||
@@ -28,7 +28,7 @@ def server():
|
||||
"512",
|
||||
"--gpu-memory-utilization",
|
||||
"0.02",
|
||||
]
|
||||
] + ROCM_EXTRA_ARGS
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import pytest
|
||||
from tests.conftest import HfRunner
|
||||
from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
|
||||
from tests.models.utils import EmbedModelInfo
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
|
||||
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
@@ -49,7 +49,7 @@ def server(model_info, dtype: str):
|
||||
"--enforce-eager",
|
||||
"--max-model-len",
|
||||
"512",
|
||||
]
|
||||
] + ROCM_EXTRA_ARGS
|
||||
|
||||
if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5":
|
||||
# Manually enable Matryoshka Embeddings
|
||||
|
||||
@@ -118,6 +118,7 @@ class PoolingServing:
|
||||
)
|
||||
|
||||
pooling_params = self.io_processor.create_pooling_params(ctx.request)
|
||||
pooling_params.verify(self.model_config)
|
||||
|
||||
for i, engine_prompt in enumerate(ctx.engine_prompts):
|
||||
prompt_request_id = (
|
||||
|
||||
@@ -309,6 +309,9 @@ def create_error_response(
|
||||
|
||||
if isinstance(message, Exception):
|
||||
exc = message
|
||||
logger.debug(
|
||||
"create_error_response called with %s: %s", type(exc).__name__, exc
|
||||
)
|
||||
|
||||
from vllm.exceptions import VLLMNotFoundError, VLLMValidationError
|
||||
|
||||
|
||||
Reference in New Issue
Block a user