Support bge-m3 sparse embeddings and colbert embeddings (#14526)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Signed-off-by: Max de Bayser <maxdebayser@gmail.com>
This commit is contained in:
Maximilien de Bayser
2026-01-22 12:52:57 -03:00
committed by GitHub
parent 444e2e7e1f
commit ff365eea94
9 changed files with 393 additions and 19 deletions

View File

@@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import numpy as np
import openai
import pytest
from scipy.spatial.distance import cosine
@@ -9,6 +8,7 @@ from vllm import LLM, SamplingParams
from vllm.config import ModelConfig
from ....utils import RemoteOpenAIServer
from .embed_utils import run_client_embeddings
MODEL_NAME = "parasail-ai/GritLM-7B-vllm"
MAX_MODEL_LEN = 4000
@@ -55,18 +55,6 @@ def run_llm_encode(
return [output.outputs.embedding for output in outputs]
async def run_client_embeddings(
client: openai.AsyncOpenAI,
queries: list[str],
instruction: str,
) -> list[list[float]]:
outputs = await client.embeddings.create(
model=MODEL_NAME,
input=[instruction + q for q in queries],
)
return [data.embedding for data in outputs.data]
def gritlm_instruction(instruction):
return (
"<|user|>\n" + instruction + "\n<|embed|>\n" if instruction else "<|embed|>\n"
@@ -145,11 +133,13 @@ async def test_gritlm_api_server_embedding():
d_rep = await run_client_embeddings(
client_embedding,
MODEL_NAME,
documents,
d_instruction,
)
q_rep = await run_client_embeddings(
client_embedding,
MODEL_NAME,
queries,
q_instruction,
)