[Frontend] Support multimodal inputs for late-interaction scoring (ColQwen3) + NewModel: nvidia/nemotron-colembed (#34574)

Signed-off-by: craftsangjae <craftsangjae@gmail.com>
2026-02-21 13:01:40 +09:00
parent 11be2c74dc
commit 5719a4e4e6
10 changed files with 532 additions and 66 deletions
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -256,6 +256,7 @@ _EMBEDDING_MODELS = {
    "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
    "ColQwen3": ("colqwen3", "ColQwen3Model"),
    "OpsColQwen3Model": ("colqwen3", "ColQwen3Model"),
+    "Qwen3VLNemotronEmbedModel": ("colqwen3", "ColQwen3Model"),
    "SiglipModel": ("siglip", "SiglipEmbeddingModel"),
    # Technically Terratorch models work on images, both in
    # input and output. I am adding it here because it piggy-backs on embedding