[CI] improve embed testing (#18747)

This commit is contained in:
wang.yuqi
2025-05-28 15:16:35 +08:00
committed by GitHub
parent 0c492b7824
commit de65fc8e1e
13 changed files with 248 additions and 178 deletions

View File

@@ -2,7 +2,8 @@
import pytest
from ...utils import EmbedModelInfo, run_embedding_correctness_test
from .embed_utils import EmbedModelInfo, correctness_test_embed_models
from .mteb_utils import mteb_test_embed_models
MODELS = [
EmbedModelInfo("nomic-ai/nomic-embed-text-v1",
@@ -13,6 +14,9 @@ MODELS = [
architecture="NomicBertModel",
dtype="float32",
enable_test=False),
EmbedModelInfo("nomic-ai/CodeRankEmbed",
architecture="NomicBertModel",
enable_test=False),
EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe",
architecture="NomicBertModel",
dtype="float32",
@@ -21,30 +25,14 @@ MODELS = [
@pytest.mark.parametrize("model_info", MODELS)
def test_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None:
from .mteb_utils import mteb_test_embed_models
def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None:
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
@pytest.mark.parametrize("model_info", MODELS)
def test_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo,
example_prompts) -> None:
if not model_info.enable_test:
pytest.skip("Skipping test.")
# ST will strip the input texts, see test_embedding.py
example_prompts = [str(s).strip() for s in example_prompts]
with vllm_runner(model_info.name,
task="embed",
dtype=model_info.dtype,
max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.encode(example_prompts)
with hf_runner(
model_info.name,
dtype=model_info.dtype,
is_sentence_transformer=True,
) as hf_model:
run_embedding_correctness_test(hf_model, example_prompts, vllm_outputs)
def test_embed_models_correctness(hf_runner, vllm_runner,
model_info: EmbedModelInfo,
example_prompts) -> None:
correctness_test_embed_models(hf_runner, vllm_runner, model_info,
example_prompts)