[CI] Add mteb testing for rerank models (#19344)

2025-06-16 16:36:43 +08:00
parent 26bc46ef89
commit f40f763f12
15 changed files with 418 additions and 246 deletions
--- a/tests/models/language/pooling/test_jina.py
+++ b/tests/models/language/pooling/test_jina.py
@@ -6,28 +6,10 @@ import pytest

 from vllm import PoolingParams

-from .embed_utils import (EmbedModelInfo, check_embeddings_close,
+from ...utils import EmbedModelInfo, RerankModelInfo
+from .embed_utils import (check_embeddings_close,
                          correctness_test_embed_models, matryoshka_fy)
-from .mteb_utils import mteb_test_embed_models
-
-SCORING_MODELS = [
-    "jinaai/jina-reranker-v2-base-multilingual",  # Roberta
-]
-
-TEXTS_1 = ["Organic skincare products for sensitive skin"]
-
-TEXTS_2 = [
-    "Organic skincare for sensitive skin with aloe vera and chamomile.",
-    "New makeup trends focus on bold colors and innovative techniques",
-    "Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille",
-    "Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken",  # noqa: E501
-    "Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla",  # noqa: E501
-    "Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras",  # noqa: E501
-    "针对敏感肌专门设计的天然有机护肤产品",
-    "新的化妆趋势注重鲜艳的颜色和创新的技巧",
-    "敏感肌のために特別に設計された天然有機スキンケア製品",
-    "新しいメイクのトレンドは鮮やかな色と革新的な技術に焦点を当てています",
-]
+from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models

 EMBEDDING_MODELS = [
    EmbedModelInfo("jinaai/jina-embeddings-v3",
@@ -35,47 +17,13 @@ EMBEDDING_MODELS = [
                   is_matryoshka=True)
 ]

-
-@pytest.fixture(scope="module", params=SCORING_MODELS)
-def model_name(request):
-    yield request.param
-
-
-@pytest.mark.parametrize("dtype", ["half"])
-def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
-
-    text_pair = [TEXTS_1[0], TEXTS_2[0]]
-
-    with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model:
-        hf_outputs = hf_model.predict([text_pair]).tolist()
-
-    with vllm_runner(model_name, task="score", dtype=dtype,
-                     max_model_len=None) as vllm_model:
-        vllm_outputs = vllm_model.score(text_pair[0], text_pair[1])
-
-    assert len(vllm_outputs) == 1
-    assert len(hf_outputs) == 1
-
-    assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.01)
-
-
-@pytest.mark.parametrize("dtype", ["half"])
-def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
-
-    text_pairs = [[TEXTS_1[0], text] for text in TEXTS_2]
-
-    with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model:
-        hf_outputs = hf_model.predict(text_pairs).tolist()
-
-    with vllm_runner(model_name, task="score", dtype=dtype,
-                     max_model_len=None) as vllm_model:
-        vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2)
-
-    assert len(vllm_outputs) == 10
-    assert len(hf_outputs) == 10
-
-    assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.01)
-    assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.01)
+RERANK_MODELS = [
+    RerankModelInfo(
+        "jinaai/jina-reranker-v2-base-multilingual",
+        architecture="XLMRobertaForSequenceClassification",
+        dtype="float32",
+    )
+]


@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
@@ -106,6 +54,12 @@ def test_embed_models_correctness(hf_runner, vllm_runner,
                                  hf_model_callback=hf_model_callback)


+@pytest.mark.parametrize("model_info", RERANK_MODELS)
+def test_rerank_models_mteb(hf_runner, vllm_runner,
+                            model_info: RerankModelInfo) -> None:
+    mteb_test_rerank_models(hf_runner, vllm_runner, model_info)
+
+
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("dimensions", [16, 32])