[ROCm][CI] Fix flaky Cohere/OpenAI embedding parity test (#37616)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-25 05:55:51 -05:00
committed by GitHub
parent 9ac2fcafbb
commit f262a62aa1
4 changed files with 8 additions and 4 deletions

View File

@@ -10,7 +10,7 @@ import numpy as np
import pytest
import requests
from tests.utils import RemoteOpenAIServer
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
MODEL_NAME = "BAAI/bge-base-en-v1.5"
DTYPE = "bfloat16"
@@ -28,7 +28,7 @@ def server():
"512",
"--gpu-memory-utilization",
"0.02",
]
] + ROCM_EXTRA_ARGS
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server

View File

@@ -10,7 +10,7 @@ import pytest
from tests.conftest import HfRunner
from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.platforms import current_platform
@@ -49,7 +49,7 @@ def server(model_info, dtype: str):
"--enforce-eager",
"--max-model-len",
"512",
]
] + ROCM_EXTRA_ARGS
if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5":
# Manually enable Matryoshka Embeddings

View File

@@ -118,6 +118,7 @@ class PoolingServing:
)
pooling_params = self.io_processor.create_pooling_params(ctx.request)
pooling_params.verify(self.model_config)
for i, engine_prompt in enumerate(ctx.engine_prompts):
prompt_request_id = (

View File

@@ -309,6 +309,9 @@ def create_error_response(
if isinstance(message, Exception):
exc = message
logger.debug(
"create_error_response called with %s: %s", type(exc).__name__, exc
)
from vllm.exceptions import VLLMNotFoundError, VLLMValidationError