feat: Add ColBERT late interaction model support (#33686)
Signed-off-by: Ilya Boytsov <ilyaboytsov1805@gmail.com> Signed-off-by: Ilya Boytsov <boytsovpanamera@mail.ru> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
57
examples/pooling/score/colbert_rerank_online.py
Normal file
57
examples/pooling/score/colbert_rerank_online.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
Example of using ColBERT late interaction model for reranking.
|
||||
|
||||
ColBERT (Contextualized Late Interaction over BERT) uses per-token embeddings
|
||||
and MaxSim scoring for document reranking, providing better accuracy than
|
||||
single-vector models while being more efficient than cross-encoders.
|
||||
|
||||
Start the server with:
|
||||
vllm serve answerdotai/answerai-colbert-small-v1
|
||||
|
||||
Then run this script:
|
||||
python colbert_rerank_online.py
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import requests
|
||||
|
||||
url = "http://127.0.0.1:8000/rerank"
|
||||
|
||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
||||
|
||||
data = {
|
||||
"model": "answerdotai/answerai-colbert-small-v1",
|
||||
"query": "What is machine learning?",
|
||||
"documents": [
|
||||
"Machine learning is a subset of artificial intelligence.",
|
||||
"Python is a programming language.",
|
||||
"Deep learning uses neural networks for complex tasks.",
|
||||
"The weather today is sunny.",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
|
||||
if response.status_code == 200:
|
||||
print("ColBERT Rerank Request successful!")
|
||||
result = response.json()
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
# Show ranked results
|
||||
print("\nRanked documents (most relevant first):")
|
||||
for item in result["results"]:
|
||||
doc_idx = item["index"]
|
||||
score = item["relevance_score"]
|
||||
print(f" Score {score:.4f}: {data['documents'][doc_idx]}")
|
||||
else:
|
||||
print(f"Request failed with status code: {response.status_code}")
|
||||
print(response.text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user