[ROCm][CI] Fix flaky Cohere/OpenAI embedding parity test (#37616)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-25 05:55:51 -05:00
committed by GitHub
parent 9ac2fcafbb
commit f262a62aa1
4 changed files with 8 additions and 4 deletions

View File

@@ -10,7 +10,7 @@ import numpy as np
import pytest
import requests
from tests.utils import RemoteOpenAIServer
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
MODEL_NAME = "BAAI/bge-base-en-v1.5"
DTYPE = "bfloat16"
@@ -28,7 +28,7 @@ def server():
"512",
"--gpu-memory-utilization",
"0.02",
]
] + ROCM_EXTRA_ARGS
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server

View File

@@ -10,7 +10,7 @@ import pytest
from tests.conftest import HfRunner
from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer
from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.platforms import current_platform
@@ -49,7 +49,7 @@ def server(model_info, dtype: str):
"--enforce-eager",
"--max-model-len",
"512",
]
] + ROCM_EXTRA_ARGS
if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5":
# Manually enable Matryoshka Embeddings