[CI] improve embed testing (#18747)

This commit is contained in:
wang.yuqi
2025-05-28 15:16:35 +08:00
committed by GitHub
parent 0c492b7824
commit de65fc8e1e
13 changed files with 248 additions and 178 deletions

View File

@@ -1,9 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
from functools import partial
import pytest
from vllm import PoolingParams
from ...utils import check_embeddings_close, matryoshka_fy
from .embed_utils import (EmbedModelInfo, check_embeddings_close,
correctness_test_embed_models, matryoshka_fy)
from .mteb_utils import mteb_test_embed_models
SCORING_MODELS = [
"jinaai/jina-reranker-v2-base-multilingual", # Roberta
@@ -25,16 +29,10 @@ TEXTS_2 = [
]
EMBEDDING_MODELS = [
"jinaai/jina-embeddings-v3",
]
EMBEDDING_PROMPTS = [
"Follow the white rabbit.", # English
"Sigue al conejo blanco.", # Spanish
"Suis le lapin blanc.", # French
"跟着白兔走。", # Chinese
"اتبع الأرنب الأبيض.", # Arabic
"Folge dem weißen Kaninchen.", # German
EmbedModelInfo("jinaai/jina-embeddings-v3",
architecture="XLMRobertaModel",
is_matryoshka=True,
dtype="float32")
]
@@ -80,73 +78,66 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.01)
@pytest.fixture(scope="module", params=EMBEDDING_MODELS)
def emb_model_name(request):
yield request.param
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None:
def hf_model_callback(model):
model.encode = partial(model.encode, task="text-matching")
mteb_test_embed_models(hf_runner,
vllm_runner,
model_info,
hf_model_callback=hf_model_callback)
def test_is_matryoshka(vllm_runner, emb_model_name):
with vllm_runner(emb_model_name, task="embed",
max_model_len=None) as vllm_model:
assert vllm_model.model.llm_engine.model_config.is_matryoshka
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
def test_embed_models_correctness(hf_runner, vllm_runner,
model_info: EmbedModelInfo,
example_prompts) -> None:
def hf_model_callback(model):
model.encode = partial(model.encode, task="text-matching")
correctness_test_embed_models(hf_runner,
vllm_runner,
model_info,
example_prompts,
hf_model_callback=hf_model_callback)
@pytest.mark.parametrize("model", EMBEDDING_MODELS)
@pytest.mark.parametrize("dtype", ["half"])
def test_embeddings(
hf_runner,
vllm_runner,
model,
dtype: str,
monkeypatch,
) -> None:
example_prompts = EMBEDDING_PROMPTS
with hf_runner(
model,
dtype=dtype,
is_sentence_transformer=True,
) as hf_model:
hf_outputs = hf_model.encode(example_prompts, task="text-matching")
with vllm_runner(model, task="embed", dtype=dtype,
max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.encode(example_prompts)
check_embeddings_close(
embeddings_0_lst=hf_outputs,
embeddings_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
tol=1e-2,
)
@pytest.mark.parametrize("model", EMBEDDING_MODELS)
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("dimensions", [16, 32])
def test_matryoshka(
hf_runner,
vllm_runner,
model,
model_info,
dtype: str,
dimensions: int,
example_prompts,
monkeypatch,
) -> None:
if not model_info.is_matryoshka:
pytest.skip("Model is not matryoshka")
example_prompts = EMBEDDING_PROMPTS
# ST will strip the input texts, see test_embedding.py
example_prompts = [str(s).strip() for s in example_prompts]
with hf_runner(
model,
model_info.name,
dtype=dtype,
is_sentence_transformer=True,
) as hf_model:
hf_outputs = hf_model.encode(example_prompts, task="text-matching")
hf_outputs = matryoshka_fy(hf_outputs, dimensions)
with vllm_runner(model, task="embed", dtype=dtype,
with vllm_runner(model_info.name,
task="embed",
dtype=dtype,
max_model_len=None) as vllm_model:
assert vllm_model.model.llm_engine.model_config.is_matryoshka
matryoshka_dimensions = (
vllm_model.model.llm_engine.model_config.matryoshka_dimensions)
assert matryoshka_dimensions is not None