[CI] Add mteb testing for rerank models (#19344)

This commit is contained in:
wang.yuqi
2025-06-16 16:36:43 +08:00
committed by GitHub
parent 26bc46ef89
commit f40f763f12
15 changed files with 418 additions and 246 deletions

View File

@@ -7,34 +7,30 @@ import pytest
from tests.models.language.pooling.mteb_utils import (MTEB_EMBED_TASKS,
MTEB_EMBED_TOL,
OpenAIClientMtebEncoder,
run_mteb_embed_task,
run_mteb_embed_task_st)
run_mteb_embed_task)
from tests.utils import RemoteOpenAIServer
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
MODEL_NAME = "BAAI/bge-m3"
DTYPE = "float16"
MAIN_SCORE = 0.7873427091972599
MODEL_NAME = "intfloat/e5-small"
MAIN_SCORE = 0.7422994752439667
@pytest.fixture(scope="module")
def server():
args = [
"--task", "embed", "--dtype", DTYPE, "--enforce-eager",
"--max-model-len", "512"
"--task", "embed", "--enforce-eager", "--disable-uvicorn-access-log"
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server
def test_mteb(server):
def test_mteb_embed(server):
client = server.get_client()
encoder = OpenAIClientMtebEncoder(MODEL_NAME, client)
vllm_main_score = run_mteb_embed_task(encoder, MTEB_EMBED_TASKS)
st_main_score = MAIN_SCORE or run_mteb_embed_task_st(
MODEL_NAME, MTEB_EMBED_TASKS)
st_main_score = MAIN_SCORE
print("VLLM main score: ", vllm_main_score)
print("SentenceTransformer main score: ", st_main_score)

View File

@@ -0,0 +1,59 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import pytest
# yapf conflicts with isort for this block
# yapf: disable
from tests.models.language.pooling.mteb_utils import (MTEB_RERANK_LANGS,
MTEB_RERANK_TASKS,
MTEB_RERANK_TOL,
RerankClientMtebEncoder,
ScoreClientMtebEncoder,
run_mteb_rerank)
# yapf: enable
from tests.utils import RemoteOpenAIServer
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
MODEL_NAME = "cross-encoder/ms-marco-MiniLM-L-6-v2"
MAIN_SCORE = 0.33437
@pytest.fixture(scope="module")
def server():
args = [
"--task", "score", "--enforce-eager", "--disable-uvicorn-access-log"
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server
def test_mteb_score(server):
url = server.url_for("score")
encoder = ScoreClientMtebEncoder(MODEL_NAME, url)
vllm_main_score = run_mteb_rerank(encoder, MTEB_RERANK_TASKS,
MTEB_RERANK_LANGS)
st_main_score = MAIN_SCORE
print("VLLM main score: ", vllm_main_score)
print("SentenceTransformer main score: ", st_main_score)
print("Difference: ", st_main_score - vllm_main_score)
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_RERANK_TOL)
def test_mteb_rerank(server):
url = server.url_for("rerank")
encoder = RerankClientMtebEncoder(MODEL_NAME, url)
vllm_main_score = run_mteb_rerank(encoder, MTEB_RERANK_TASKS,
MTEB_RERANK_LANGS)
st_main_score = MAIN_SCORE
print("VLLM main score: ", vllm_main_score)
print("SentenceTransformer main score: ", st_main_score)
print("Difference: ", st_main_score - vllm_main_score)
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_RERANK_TOL)