tests/models/language/pooling/embed_utils.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence

import openai
import pytest

from tests.conftest import HfRunner
from tests.models.utils import EmbedModelInfo, check_embeddings_close, matryoshka_fy


def run_embedding_correctness_test(
    hf_model: "HfRunner",
    inputs: list[str],
    vllm_outputs: Sequence[list[float]],
    dimensions: int | None = None,
):
    hf_outputs = hf_model.encode(inputs)
    if dimensions:
        hf_outputs = matryoshka_fy(hf_outputs, dimensions)

    check_embeddings_close(
        embeddings_0_lst=hf_outputs,
        embeddings_1_lst=vllm_outputs,
        name_0="hf",
        name_1="vllm",
        tol=1e-2,
    )


def correctness_test_embed_models(
    hf_runner,
    vllm_runner,
    model_info: EmbedModelInfo,
    example_prompts,
    vllm_extra_kwargs=None,
    hf_model_callback=None,
):
    pytest.skip("Debug only, ci prefers to use mteb test.")

    # The example_prompts has ending "\n", for example:
    # "Write a short story about a robot that dreams for the first time.\n"
    # sentence_transformers will strip the input texts, see:
    # https://github.com/UKPLab/sentence-transformers/blob/v3.1.1/sentence_transformers/models/Transformer.py#L159
    # This makes the input_ids different between hf_model and vllm_model.
    # So we need to strip the input texts to avoid test failing.
    example_prompts = [str(s).strip() for s in example_prompts]

    vllm_extra_kwargs = vllm_extra_kwargs or {}
    vllm_extra_kwargs["dtype"] = model_info.dtype

    if model_info.hf_overrides is not None:
        vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides

    with vllm_runner(
        model_info.name, runner="pooling", max_model_len=None, **vllm_extra_kwargs
    ) as vllm_model:
        vllm_outputs = vllm_model.embed(example_prompts)

    with hf_runner(
        model_info.name,
        dtype=model_info.hf_dtype,
        is_sentence_transformer=True,
    ) as hf_model:
        if hf_model_callback is not None:
            hf_model_callback(hf_model)

        run_embedding_correctness_test(hf_model, example_prompts, vllm_outputs)


async def run_client_embeddings(
    client: openai.AsyncOpenAI,
    model_name: str,
    queries: list[str],
    instruction: str = "",
) -> list[list[float]]:
    outputs = await client.embeddings.create(
        model=model_name,
        input=[instruction + q for q in queries],
    )
    return [data.embedding for data in outputs.data]
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00			`from collections.abc import Sequence`

Support bge-m3 sparse embeddings and colbert embeddings (#14526) Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Max de Bayser <maxdebayser@gmail.com> 2026-01-22 12:52:57 -03:00			`import openai`
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00			`import pytest`

			`from tests.conftest import HfRunner`
			`from tests.models.utils import EmbedModelInfo, check_embeddings_close, matryoshka_fy`


			`def run_embedding_correctness_test(`
			`hf_model: "HfRunner",`
			`inputs: list[str],`
			`vllm_outputs: Sequence[list[float]],`
			`dimensions: int \| None = None,`
			`):`
			`hf_outputs = hf_model.encode(inputs)`
			`if dimensions:`
			`hf_outputs = matryoshka_fy(hf_outputs, dimensions)`

			`check_embeddings_close(`
			`embeddings_0_lst=hf_outputs,`
			`embeddings_1_lst=vllm_outputs,`
			`name_0="hf",`
			`name_1="vllm",`
			`tol=1e-2,`
			`)`


			`def correctness_test_embed_models(`
			`hf_runner,`
			`vllm_runner,`
			`model_info: EmbedModelInfo,`
			`example_prompts,`
			`vllm_extra_kwargs=None,`
			`hf_model_callback=None,`
			`):`
[CI] Accelerate mteb test by setting SentenceTransformers mteb score to a constant (#24088) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-03 17:23:56 +08:00			`pytest.skip("Debug only, ci prefers to use mteb test.")`
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00
			`# The example_prompts has ending "\n", for example:`
			`# "Write a short story about a robot that dreams for the first time.\n"`
			`# sentence_transformers will strip the input texts, see:`
			`# https://github.com/UKPLab/sentence-transformers/blob/v3.1.1/sentence_transformers/models/Transformer.py#L159`
			`# This makes the input_ids different between hf_model and vllm_model.`
			`# So we need to strip the input texts to avoid test failing.`
			`example_prompts = [str(s).strip() for s in example_prompts]`

			`vllm_extra_kwargs = vllm_extra_kwargs or {}`
			`vllm_extra_kwargs["dtype"] = model_info.dtype`

[New Model]: Support GteNewModelForSequenceClassification (#23524) Signed-off-by: wang.yuqi <noooop@126.com> 2025-08-28 15:36:42 +08:00			`if model_info.hf_overrides is not None:`
			`vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides`

[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00			`with vllm_runner(`
			`model_info.name, runner="pooling", max_model_len=None, **vllm_extra_kwargs`
			`) as vllm_model:`
[Bugfix][v1] Fix step pooler implementation and step pooling usage in v1 (#19956) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-24 02:38:06 +08:00			`vllm_outputs = vllm_model.embed(example_prompts)`
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00
			`with hf_runner(`
			`model_info.name,`
[CI] Add PPL test for generation models (#24485) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-10 21:16:39 +08:00			`dtype=model_info.hf_dtype,`
[CI] improve embed testing (#18747) 2025-05-28 15:16:35 +08:00			`is_sentence_transformer=True,`
			`) as hf_model:`
			`if hf_model_callback is not None:`
			`hf_model_callback(hf_model)`

			`run_embedding_correctness_test(hf_model, example_prompts, vllm_outputs)`
Support bge-m3 sparse embeddings and colbert embeddings (#14526) Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Max de Bayser <maxdebayser@gmail.com> 2026-01-22 12:52:57 -03:00

			`async def run_client_embeddings(`
			`client: openai.AsyncOpenAI,`
			`model_name: str,`
			`queries: list[str],`
			`instruction: str = "",`
			`) -> list[list[float]]:`
			`outputs = await client.embeddings.create(`
			`model=model_name,`
			`input=[instruction + q for q in queries],`
			`)`
			`return [data.embedding for data in outputs.data]`