feat: Add ColBERT late interaction model support (#33686)

Signed-off-by: Ilya Boytsov <ilyaboytsov1805@gmail.com>
Signed-off-by: Ilya Boytsov <boytsovpanamera@mail.ru>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
Ilya Boytsov
2026-02-05 01:05:13 +01:00
committed by GitHub
parent fa4e0fb028
commit 439afa4eea
13 changed files with 974 additions and 3 deletions

View File

@@ -49,6 +49,7 @@ from .interfaces import (
is_hybrid,
requires_raw_input_tokens,
supports_cross_encoding,
supports_late_interaction,
supports_mamba_prefix_caching,
supports_multimodal,
supports_multimodal_encoder_tp_data,
@@ -205,6 +206,7 @@ _EMBEDDING_MODELS = {
# [Text-only]
"BertModel": ("bert", "BertEmbeddingModel"),
"BertSpladeSparseEmbeddingModel": ("bert", "BertSpladeSparseEmbeddingModel"),
"HF_ColBERT": ("colbert", "ColBERTModel"),
"DeciLMForCausalLM": ("nemotron_nas", "DeciLMForCausalLM"),
"Gemma2Model": ("gemma2", "Gemma2ForCausalLM"),
"Gemma3TextModel": ("gemma3", "Gemma3Model"),
@@ -593,6 +595,7 @@ class _ModelInfo:
default_seq_pooling_type: SequencePoolingType
default_tok_pooling_type: TokenPoolingType
supports_cross_encoding: bool
supports_late_interaction: bool
supports_multimodal: bool
supports_multimodal_raw_input_only: bool
requires_raw_input_tokens: bool
@@ -616,6 +619,7 @@ class _ModelInfo:
default_tok_pooling_type=get_default_tok_pooling_type(model),
attn_type=get_attn_type(model),
supports_cross_encoding=supports_cross_encoding(model),
supports_late_interaction=supports_late_interaction(model),
supports_multimodal=supports_multimodal(model),
supports_multimodal_raw_input_only=supports_multimodal_raw_input_only(
model