Support bge-m3 sparse embeddings and colbert embeddings (#14526)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Max de Bayser <maxdebayser@gmail.com>
2026-01-22 12:52:57 -03:00
parent 444e2e7e1f
commit ff365eea94
9 changed files with 393 additions and 19 deletions
--- a/tests/models/language/pooling/test_gritlm.py
+++ b/tests/models/language/pooling/test_gritlm.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import numpy as np
-import openai
 import pytest
 from scipy.spatial.distance import cosine

@@ -9,6 +8,7 @@ from vllm import LLM, SamplingParams
 from vllm.config import ModelConfig

 from ....utils import RemoteOpenAIServer
+from .embed_utils import run_client_embeddings

 MODEL_NAME = "parasail-ai/GritLM-7B-vllm"
 MAX_MODEL_LEN = 4000
@@ -55,18 +55,6 @@ def run_llm_encode(
    return [output.outputs.embedding for output in outputs]


-async def run_client_embeddings(
-    client: openai.AsyncOpenAI,
-    queries: list[str],
-    instruction: str,
-) -> list[list[float]]:
-    outputs = await client.embeddings.create(
-        model=MODEL_NAME,
-        input=[instruction + q for q in queries],
-    )
-    return [data.embedding for data in outputs.data]
-
-
 def gritlm_instruction(instruction):
    return (
        "<|user|>\n" + instruction + "\n<|embed|>\n" if instruction else "<|embed|>\n"
@@ -145,11 +133,13 @@ async def test_gritlm_api_server_embedding():

        d_rep = await run_client_embeddings(
            client_embedding,
+            MODEL_NAME,
            documents,
            d_instruction,
        )
        q_rep = await run_client_embeddings(
            client_embedding,
+            MODEL_NAME,
            queries,
            q_instruction,
        )