2025-04-18 23:11:57 +08:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-06-03 11:20:17 -07:00
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-04-18 23:11:57 +08:00
|
|
|
|
2025-05-11 15:59:43 +08:00
|
|
|
import pytest
|
2025-04-18 23:11:57 +08:00
|
|
|
|
2025-08-12 00:41:37 +08:00
|
|
|
from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo
|
|
|
|
|
from .embed_utils import correctness_test_embed_models
|
2025-05-28 15:16:35 +08:00
|
|
|
from .mteb_utils import mteb_test_embed_models
|
2025-04-18 23:11:57 +08:00
|
|
|
|
|
|
|
|
MODELS = [
|
2025-08-12 00:41:37 +08:00
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-xs",
|
|
|
|
|
is_matryoshka=False,
|
|
|
|
|
architecture="BertModel",
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_score=0.714927797,
|
2025-08-12 00:41:37 +08:00
|
|
|
enable_test=True),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-s",
|
|
|
|
|
is_matryoshka=False,
|
|
|
|
|
architecture="BertModel",
|
|
|
|
|
enable_test=False),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m",
|
|
|
|
|
is_matryoshka=False,
|
|
|
|
|
architecture="BertModel",
|
|
|
|
|
enable_test=False),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long",
|
|
|
|
|
is_matryoshka=False,
|
|
|
|
|
architecture="NomicBertModel",
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_score=0.681146831,
|
2025-08-12 00:41:37 +08:00
|
|
|
enable_test=True),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l",
|
|
|
|
|
is_matryoshka=False,
|
|
|
|
|
architecture="BertModel",
|
|
|
|
|
enable_test=False),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5",
|
|
|
|
|
is_matryoshka=True,
|
|
|
|
|
architecture="BertModel",
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_score=0.649088363,
|
2025-08-12 00:41:37 +08:00
|
|
|
enable_test=True),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0",
|
|
|
|
|
is_matryoshka=True,
|
|
|
|
|
architecture="XLMRobertaModel",
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_score=0.712258299,
|
2025-08-12 00:41:37 +08:00
|
|
|
enable_test=True),
|
|
|
|
|
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
|
|
|
|
|
is_matryoshka=True,
|
|
|
|
|
architecture="GteModel",
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_score=0.706622444,
|
2025-08-12 00:41:37 +08:00
|
|
|
enable_test=True),
|
2025-04-18 23:11:57 +08:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("model_info", MODELS)
|
2025-05-28 15:16:35 +08:00
|
|
|
def test_embed_models_mteb(hf_runner, vllm_runner,
|
|
|
|
|
model_info: EmbedModelInfo) -> None:
|
2025-09-03 17:23:56 +08:00
|
|
|
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
2025-04-18 23:11:57 +08:00
|
|
|
|
|
|
|
|
|
2025-05-11 15:59:43 +08:00
|
|
|
@pytest.mark.parametrize("model_info", MODELS)
|
2025-05-28 15:16:35 +08:00
|
|
|
def test_embed_models_correctness(hf_runner, vllm_runner,
|
|
|
|
|
model_info: EmbedModelInfo,
|
|
|
|
|
example_prompts) -> None:
|
|
|
|
|
correctness_test_embed_models(hf_runner, vllm_runner, model_info,
|
|
|
|
|
example_prompts)
|