57 lines
1.7 KiB
Python
57 lines
1.7 KiB
Python
|
|
# SPDX-License-Identifier: Apache-2.0
|
||
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from tests.models.language.pooling.embed_utils import correctness_test_embed_models
|
||
|
|
from tests.models.utils import EmbedModelInfo
|
||
|
|
|
||
|
|
from .mteb_embed_utils import mteb_test_embed_models
|
||
|
|
|
||
|
|
MODELS = [
|
||
|
|
EmbedModelInfo(
|
||
|
|
"voyageai/voyage-4-nano",
|
||
|
|
architecture="VoyageQwen3BidirectionalEmbedModel",
|
||
|
|
enable_test=True,
|
||
|
|
seq_pooling_type="MEAN",
|
||
|
|
attn_type="encoder_only",
|
||
|
|
is_prefix_caching_supported=False,
|
||
|
|
is_chunked_prefill_supported=False,
|
||
|
|
hf_overrides={
|
||
|
|
"architectures": ["VoyageQwen3BidirectionalEmbedModel"],
|
||
|
|
"num_labels": 2048,
|
||
|
|
},
|
||
|
|
mteb_score=0.7054,
|
||
|
|
# === MTEB Results ===
|
||
|
|
# STS12: 0.6613
|
||
|
|
# STS13: 0.6906
|
||
|
|
# STS14: 0.6556
|
||
|
|
# STS15: 0.7843
|
||
|
|
# STS16: 0.7340
|
||
|
|
# STSBenchmark: 0.7063
|
||
|
|
# Average score: 0.7054
|
||
|
|
),
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.parametrize("model_info", MODELS)
|
||
|
|
def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo) -> None:
|
||
|
|
# Encoder-only attention models need enforce_eager=True to avoid
|
||
|
|
# CUDA graph capture issues with piecewise compilation
|
||
|
|
mteb_test_embed_models(
|
||
|
|
hf_runner, vllm_runner, model_info, vllm_extra_kwargs={"enforce_eager": True}
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.parametrize("model_info", MODELS)
|
||
|
|
def test_embed_models_correctness(
|
||
|
|
hf_runner, vllm_runner, model_info: EmbedModelInfo, example_prompts
|
||
|
|
) -> None:
|
||
|
|
correctness_test_embed_models(
|
||
|
|
hf_runner,
|
||
|
|
vllm_runner,
|
||
|
|
model_info,
|
||
|
|
example_prompts,
|
||
|
|
vllm_extra_kwargs={"enforce_eager": True},
|
||
|
|
)
|