diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md
index 9bc402d23..9081b5e82 100644
--- a/docs/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -625,6 +625,46 @@ curl -s http://localhost:8000/rerank -H "Content-Type: application/json" -d '{
}'
```
+### ColQwen3.5 Multi-Modal Late Interaction Models
+
+ColQwen3.5 is based on [ColPali](https://arxiv.org/abs/2407.01449), extending ColBERT's late interaction approach to **multi-modal** inputs. It uses the Qwen3.5 hybrid backbone (linear + full attention) and produces per-token L2-normalized vectors for MaxSim scoring.
+
+| Architecture | Backbone | Example HF Models |
+| - | - | - |
+| `ColQwen3_5` | Qwen3.5 | `athrael-soju/colqwen3.5-4.5B` |
+
+Start the server:
+
+```shell
+vllm serve athrael-soju/colqwen3.5-4.5B --max-model-len 4096
+```
+
+Then you can use the rerank endpoint:
+
+```shell
+curl -s http://localhost:8000/rerank -H "Content-Type: application/json" -d '{
+ "model": "athrael-soju/colqwen3.5-4.5B",
+ "query": "What is machine learning?",
+ "documents": [
+ "Machine learning is a subset of artificial intelligence.",
+ "Python is a programming language.",
+ "Deep learning uses neural networks."
+ ]
+}'
+```
+
+Or the score endpoint:
+
+```shell
+curl -s http://localhost:8000/score -H "Content-Type: application/json" -d '{
+ "model": "athrael-soju/colqwen3.5-4.5B",
+ "text_1": "What is the capital of France?",
+ "text_2": ["The capital of France is Paris.", "Python is a programming language."]
+}'
+```
+
+An example can be found here: [examples/pooling/score/colqwen3_5_rerank_online.py](../../examples/pooling/score/colqwen3_5_rerank_online.py)
+
### BAAI/bge-m3
The `BAAI/bge-m3` model comes with extra weights for sparse and colbert embeddings but unfortunately in its `config.json`
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 2141163df..dea60155a 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -834,6 +834,7 @@ The following table lists those that are tested in vLLM.
| `CLIPModel` | CLIP | T / I | `openai/clip-vit-base-patch32`, `openai/clip-vit-large-patch14`, etc. | | |
| `ColModernVBertForRetrieval` | ColModernVBERT | T / I | `ModernVBERT/colmodernvbert-merged` | | |
| `ColPaliForRetrieval` | ColPali | T / I | `vidore/colpali-v1.3-hf` | | |
+| `ColQwen3_5` | ColQwen3.5 | T + I + V | `athrael-soju/colqwen3.5-4.5B-v3` | | |
| `LlamaNemotronVLModel` | Llama Nemotron Embedding + SigLIP | T + I | `nvidia/llama-nemotron-embed-vl-1b-v2` | | |
| `LlavaNextForConditionalGeneration`C | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
| `Phi3VForCausalLM`C | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
diff --git a/examples/pooling/score/colqwen3_5_rerank_online.py b/examples/pooling/score/colqwen3_5_rerank_online.py
new file mode 100644
index 000000000..c64bcfc81
--- /dev/null
+++ b/examples/pooling/score/colqwen3_5_rerank_online.py
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Example of using ColQwen3.5 late interaction model for reranking.
+
+ColQwen3.5 is a multi-modal ColBERT-style model based on Qwen3.5.
+It produces per-token embeddings and uses MaxSim scoring for retrieval
+and reranking. Supports both text and image inputs.
+
+Start the server with:
+ vllm serve athrael-soju/colqwen3.5-4.5B --max-model-len 4096
+
+Then run this script:
+ python colqwen3_5_rerank_online.py
+"""
+
+import requests
+
+MODEL = "athrael-soju/colqwen3.5-4.5B"
+BASE_URL = "http://127.0.0.1:8000"
+
+headers = {"accept": "application/json", "Content-Type": "application/json"}
+
+
+def rerank_text():
+ """Text-only reranking via /rerank endpoint."""
+ print("=" * 60)
+ print("1. Text reranking (/rerank)")
+ print("=" * 60)
+
+ data = {
+ "model": MODEL,
+ "query": "What is machine learning?",
+ "documents": [
+ "Machine learning is a subset of artificial intelligence.",
+ "Python is a programming language.",
+ "Deep learning uses neural networks for complex tasks.",
+ "The weather today is sunny.",
+ ],
+ }
+
+ response = requests.post(f"{BASE_URL}/rerank", headers=headers, json=data)
+
+ if response.status_code == 200:
+ result = response.json()
+ print("\n Ranked documents (most relevant first):")
+ for item in result["results"]:
+ doc_idx = item["index"]
+ score = item["relevance_score"]
+ print(f" [{score:.4f}] {data['documents'][doc_idx]}")
+ else:
+ print(f" Request failed: {response.status_code}")
+ print(f" {response.text[:300]}")
+
+
+def score_text():
+ """Text-only scoring via /score endpoint."""
+ print()
+ print("=" * 60)
+ print("2. Text scoring (/score)")
+ print("=" * 60)
+
+ query = "What is the capital of France?"
+ documents = [
+ "The capital of France is Paris.",
+ "Berlin is the capital of Germany.",
+ "Python is a programming language.",
+ ]
+
+ data = {
+ "model": MODEL,
+ "text_1": query,
+ "text_2": documents,
+ }
+
+ response = requests.post(f"{BASE_URL}/score", headers=headers, json=data)
+
+ if response.status_code == 200:
+ result = response.json()
+ print(f"\n Query: {query}\n")
+ for item in result["data"]:
+ idx = item["index"]
+ score = item["score"]
+ print(f" Doc {idx} (score={score:.4f}): {documents[idx]}")
+ else:
+ print(f" Request failed: {response.status_code}")
+ print(f" {response.text[:300]}")
+
+
+def score_text_top_n():
+ """Text reranking with top_n filtering via /rerank endpoint."""
+ print()
+ print("=" * 60)
+ print("3. Text reranking with top_n=2 (/rerank)")
+ print("=" * 60)
+
+ data = {
+ "model": MODEL,
+ "query": "What is the capital of France?",
+ "documents": [
+ "The capital of France is Paris.",
+ "Berlin is the capital of Germany.",
+ "Python is a programming language.",
+ "The Eiffel Tower is in Paris.",
+ ],
+ "top_n": 2,
+ }
+
+ response = requests.post(f"{BASE_URL}/rerank", headers=headers, json=data)
+
+ if response.status_code == 200:
+ result = response.json()
+ print(f"\n Top {data['top_n']} results:")
+ for item in result["results"]:
+ doc_idx = item["index"]
+ score = item["relevance_score"]
+ print(f" [{score:.4f}] {data['documents'][doc_idx]}")
+ else:
+ print(f" Request failed: {response.status_code}")
+ print(f" {response.text[:300]}")
+
+
+def main():
+ rerank_text()
+ score_text()
+ score_text_top_n()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/models/multimodal/pooling/test_colqwen3_5.py b/tests/models/multimodal/pooling/test_colqwen3_5.py
new file mode 100644
index 000000000..d5899b7a4
--- /dev/null
+++ b/tests/models/multimodal/pooling/test_colqwen3_5.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for ColQwen3.5 late interaction model for multi-modal retrieval.
+
+ColQwen3.5 is a multi-vector retrieval model based on Qwen3.5 backbone with
+ColBERT-style late interaction scoring (MaxSim). It produces per-token
+embeddings for both text and image inputs.
+"""
+
+import pytest
+import torch
+
+from ....conftest import VllmRunner
+
+MODELS = [
+ "athrael-soju/colqwen3.5-4.5B-v3",
+]
+
+EMBED_DIMS = {
+ "athrael-soju/colqwen3.5-4.5B-v3": 320,
+}
+
+TEXT_QUERIES = [
+ "What is the capital of France?",
+ "Describe the contents of the document.",
+]
+
+TEXT_DOCUMENTS = [
+ "The capital of France is Paris.",
+ "This document contains important financial data.",
+]
+
+DTYPE = "half"
+
+
+def _run_token_embed_test(
+ vllm_runner: type[VllmRunner],
+ model: str,
+ *,
+ dtype: str,
+) -> None:
+ """Verify per-token embedding shape and L2 normalization."""
+ with vllm_runner(
+ model,
+ runner="pooling",
+ dtype=dtype,
+ max_model_len=4096,
+ enforce_eager=True,
+ ) as vllm_model:
+ outputs = vllm_model.token_embed([TEXT_QUERIES[0]])
+
+ assert len(outputs) == 1
+ emb = torch.tensor(outputs[0])
+ # Token embeddings should be 2D: [num_tokens, embed_dim]
+ assert emb.dim() == 2
+ assert emb.shape[1] == EMBED_DIMS[model]
+ assert emb.shape[0] > 1
+
+ # Verify L2 normalization
+ norms = torch.norm(emb, p=2, dim=-1)
+ torch.testing.assert_close(
+ norms,
+ torch.ones_like(norms),
+ rtol=1e-2,
+ atol=1e-2,
+ )
+
+
+def _run_late_interaction_test(
+ vllm_runner: type[VllmRunner],
+ model: str,
+ *,
+ dtype: str,
+) -> None:
+ """Verify MaxSim scoring matches manual computation."""
+ from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+
+ with vllm_runner(
+ model,
+ runner="pooling",
+ dtype=dtype,
+ max_model_len=4096,
+ enforce_eager=True,
+ ) as vllm_model:
+ q_outputs = vllm_model.token_embed([TEXT_QUERIES[0]])
+ d_outputs = vllm_model.token_embed([TEXT_DOCUMENTS[0]])
+
+ q_emb = torch.tensor(q_outputs[0])
+ d_emb = torch.tensor(d_outputs[0])
+
+ manual_score = compute_maxsim_score(q_emb, d_emb).item()
+
+ vllm_scores = vllm_model.score(TEXT_QUERIES[0], TEXT_DOCUMENTS[0])
+
+ assert len(vllm_scores) == 1
+ assert vllm_scores[0] == pytest.approx(manual_score, rel=0.01)
+
+
+def _run_relevance_test(
+ vllm_runner: type[VllmRunner],
+ model: str,
+ *,
+ dtype: str,
+) -> None:
+ """Verify that relevant documents score higher than irrelevant ones."""
+ query = "What is machine learning?"
+ documents = [
+ "Machine learning is a subset of artificial intelligence.",
+ "The weather forecast shows rain tomorrow.",
+ "Deep learning uses neural networks for complex tasks.",
+ ]
+
+ with vllm_runner(
+ model,
+ runner="pooling",
+ dtype=dtype,
+ max_model_len=4096,
+ enforce_eager=True,
+ ) as vllm_model:
+ scores = vllm_model.score(query, documents)
+
+ assert len(scores) == 3
+ assert scores[0] > scores[1], "ML doc should score higher than weather doc"
+ assert scores[2] > scores[1], "DL doc should score higher than weather doc"
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", [DTYPE])
+def test_colqwen3_5_token_embed(
+ vllm_runner,
+ model: str,
+ dtype: str,
+) -> None:
+ _run_token_embed_test(vllm_runner, model, dtype=dtype)
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", [DTYPE])
+def test_colqwen3_5_late_interaction_scoring(
+ vllm_runner,
+ model: str,
+ dtype: str,
+) -> None:
+ _run_late_interaction_test(vllm_runner, model, dtype=dtype)
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", [DTYPE])
+def test_colqwen3_5_relevance_ordering(
+ vllm_runner,
+ model: str,
+ dtype: str,
+) -> None:
+ _run_relevance_test(vllm_runner, model, dtype=dtype)
diff --git a/tests/models/registry.py b/tests/models/registry.py
index fe5585f85..47551d7eb 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -639,6 +639,11 @@ _LATE_INTERACTION_EXAMPLE_MODELS = {
"OpsColQwen3Model": _HfExamplesInfo(
"OpenSearch-AI/Ops-Colqwen3-4B", trust_remote_code=True
),
+ "ColQwen3_5": _HfExamplesInfo(
+ "athrael-soju/colqwen3.5-4.5B-v3",
+ trust_remote_code=True,
+ max_model_len=4096,
+ ),
"Qwen3VLNemotronEmbedModel": _HfExamplesInfo(
"nvidia/nemotron-colembed-vl-4b-v2",
),
diff --git a/vllm/model_executor/models/colqwen3_5.py b/vllm/model_executor/models/colqwen3_5.py
new file mode 100644
index 000000000..5c28fb6d3
--- /dev/null
+++ b/vllm/model_executor/models/colqwen3_5.py
@@ -0,0 +1,246 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+ColQwen3.5 late interaction model for multi-modal retrieval and reranking.
+
+ColQwen3.5 extends Qwen3.5 with a ColBERT-style late interaction head,
+producing per-token embeddings for both text and image inputs. It uses
+MaxSim scoring for retrieval/reranking tasks.
+
+This model supports the "token_embed" pooling task and is designed for
+multi-vector retrieval of documents containing both text and images.
+
+Reference: https://arxiv.org/abs/2407.01449 (ColPali)
+Based on: Qwen3.5 backbone with custom text projection
+
+Target models:
+- athrael-soju/colqwen3.5-4.5B-v3
+"""
+
+from collections.abc import Iterable, Mapping
+
+import torch
+import torch.nn as nn
+from transformers.models.qwen3_vl import Qwen3VLProcessor
+
+from vllm.config import VllmConfig
+from vllm.model_executor.layers.pooler.tokwise import pooler_for_token_embed
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.multimodal import MULTIMODAL_REGISTRY
+
+from .interfaces import SupportsLateInteraction
+from .interfaces_base import default_pooling_type
+from .qwen2_vl import Qwen2VLMultiModalDataParser
+from .qwen3_5 import (
+ Qwen3_5ForConditionalGeneration,
+ Qwen3_5ProcessingInfo,
+)
+from .qwen3_vl import (
+ Qwen3VLDummyInputsBuilder,
+ Qwen3VLMultiModalProcessor,
+)
+from .utils import AutoWeightsLoader, WeightsMapper
+
+
+class ColQwen3_5ProcessingInfo(Qwen3_5ProcessingInfo):
+ """Processing info for ColQwen3.5 models.
+
+ ColQwen3.5 models use custom HuggingFace processors (e.g.
+ ColQwen3_5Processor) that are incompatible with vLLM's
+ Qwen3VLMultiModalProcessor. We override get_hf_config() and
+ get_hf_processor() to skip the strict type check and force the
+ standard Qwen3VLProcessor.
+ """
+
+ def get_hf_config(self):
+ return self.ctx.get_hf_config()
+
+ def get_hf_processor(self, **kwargs: object) -> Qwen3VLProcessor:
+ return self.ctx.get_hf_processor(
+ Qwen3VLProcessor,
+ use_fast=kwargs.pop("use_fast", True),
+ **kwargs,
+ )
+
+ @property
+ def _supports_video(self) -> bool:
+ """Check if the HF processor supports video inputs."""
+ return hasattr(self.get_hf_processor(), "video_processor")
+
+ def get_video_processor(self, **kwargs: object):
+ if not self._supports_video:
+ raise AttributeError(
+ f"The processor for {self.ctx.model_config.model} does not "
+ "support video inputs (no video_processor attribute)."
+ )
+ return self.get_hf_processor(**kwargs).video_processor # type: ignore[attr-defined]
+
+ def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+ limits: dict[str, int | None] = {"image": None}
+ if self._supports_video:
+ limits["video"] = None
+ return limits
+
+ def get_mm_max_tokens_per_item(
+ self,
+ seq_len: int,
+ mm_counts: Mapping[str, int],
+ ) -> Mapping[str, int]:
+ max_image_tokens = self.get_max_image_tokens()
+ result: dict[str, int] = {"image": max_image_tokens}
+ if self._supports_video:
+ max_video_tokens = self.get_max_video_tokens(seq_len, mm_counts)
+ result["video"] = max_video_tokens
+ return result
+
+ def get_data_parser(self):
+ hf_config = self.get_hf_config()
+ spatial_merge_size = hf_config.vision_config.spatial_merge_size
+ return Qwen2VLMultiModalDataParser(
+ spatial_merge_size,
+ video_needs_metadata=self._supports_video,
+ expected_hidden_size=self._get_expected_hidden_size(),
+ )
+
+
+@default_pooling_type(seq_pooling_type="CLS", tok_pooling_type="ALL")
+@MULTIMODAL_REGISTRY.register_processor(
+ Qwen3VLMultiModalProcessor,
+ info=ColQwen3_5ProcessingInfo,
+ dummy_inputs=Qwen3VLDummyInputsBuilder,
+)
+class ColQwen3_5Model(
+ Qwen3_5ForConditionalGeneration,
+ SupportsLateInteraction,
+):
+ """ColQwen3.5 late interaction model for multi-modal retrieval/reranking.
+
+ This model extends Qwen3_5ForConditionalGeneration with a ColBERT-style
+ linear projection layer for per-token embeddings. It supports:
+ - "token_embed" task: Per-token embeddings for late interaction scoring
+
+ The model produces per-token embeddings by:
+ 1. Running the Qwen3.5 backbone (vision + language) to get hidden states
+ 2. Projecting hidden states through a linear layer (hidden_size -> embed_dim)
+ 3. L2 normalization is handled by the pooler via PoolerNormalize
+
+ Attributes:
+ custom_text_proj: Linear projection from hidden_size to embed_dim
+ """
+
+ # Mark this as a pooling model so vLLM routes to pooler path
+ is_pooling_model = True
+
+ # Override hf_to_vllm_mapper to handle ColQwen3.5 weight naming.
+ # ColPali saves weights as "language_model.*" but vLLM's
+ # Qwen3_5ForCausalLM has them under "language_model.model.*".
+ # Visual weights ("visual.*") already match the vLLM module path.
+ hf_to_vllm_mapper = WeightsMapper(
+ orig_to_new_prefix={
+ "language_model.": "language_model.model.",
+ }
+ )
+
+ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+ super().__init__(vllm_config=vllm_config, prefix=prefix)
+
+ config = vllm_config.model_config.hf_config
+ head_dtype = vllm_config.model_config.head_dtype
+
+ hidden_size = getattr(config, "hidden_size", None)
+ if hidden_size is None and hasattr(config, "text_config"):
+ hidden_size = config.text_config.hidden_size
+ if hidden_size is None:
+ raise ValueError(
+ "Unable to determine text hidden size from config. "
+ "Expected 'hidden_size' or 'text_config.hidden_size'."
+ )
+
+ # (ColPali: dim, projection_dim, colbert_dim)
+ self.embed_dim: int = (
+ getattr(config, "embed_dim", None)
+ or getattr(config, "dims", None)
+ or getattr(config, "dim", None)
+ or getattr(config, "projection_dim", None)
+ or getattr(config, "colbert_dim", None)
+ or 128 # default from reference implementation
+ )
+
+ self.custom_text_proj = nn.Linear(
+ hidden_size,
+ self.embed_dim,
+ bias=False,
+ dtype=head_dtype,
+ )
+
+ pooler_config = vllm_config.model_config.pooler_config
+ assert pooler_config is not None
+ self.pooler = pooler_for_token_embed(
+ pooler_config,
+ projector=None,
+ )
+
+ def forward(
+ self,
+ input_ids: torch.Tensor | None,
+ positions: torch.Tensor,
+ intermediate_tensors=None,
+ inputs_embeds: torch.Tensor | None = None,
+ **kwargs: object,
+ ) -> torch.Tensor:
+ """Run forward pass producing per-token embeddings."""
+ hidden_states = super().forward(
+ input_ids=input_ids,
+ positions=positions,
+ intermediate_tensors=intermediate_tensors,
+ inputs_embeds=inputs_embeds,
+ **kwargs,
+ )
+
+ if not isinstance(hidden_states, torch.Tensor):
+ return hidden_states # type: ignore
+
+ proj_dtype = self.custom_text_proj.weight.dtype
+ if hidden_states.dtype != proj_dtype:
+ hidden_states = hidden_states.to(proj_dtype)
+
+ # Project to embedding dimension (normalization handled by pooler)
+ return self.custom_text_proj(hidden_states)
+
+ # Names used for the projection layer across different ColQwen3.5 variants
+ _PROJ_LAYER_NAMES = {
+ "custom_text_proj", # ColPali naming
+ "embedding_proj_layer", # Alternative naming
+ }
+
+ def _is_proj_weight(self, name: str) -> bool:
+ """Check if a weight name belongs to the projection layer."""
+ return any(proj_name in name for proj_name in self._PROJ_LAYER_NAMES)
+
+ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+ """Load weights with special handling for projection layer."""
+ weights_list = list(weights)
+ proj_weights: list[tuple[str, torch.Tensor]] = []
+ model_weights: list[tuple[str, torch.Tensor]] = []
+
+ for name, weight in weights_list:
+ if self._is_proj_weight(name):
+ proj_weights.append((name, weight))
+ else:
+ model_weights.append((name, weight))
+
+ loader = AutoWeightsLoader(
+ self,
+ skip_prefixes=["mtp."],
+ )
+ loaded = loader.load_weights(model_weights, mapper=self.hf_to_vllm_mapper)
+
+ for name, weight in proj_weights:
+ param_name = name.split(".")[-1]
+ param = getattr(self.custom_text_proj, param_name, None)
+ if param is not None:
+ weight = weight.to(device=param.device, dtype=param.dtype)
+ default_weight_loader(param, weight)
+ loaded.add(f"custom_text_proj.{param_name}")
+
+ return loaded
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
index 881963dbc..488cfa35c 100644
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -647,6 +647,7 @@ class VoyageQwen3BidirectionalEmbedModelConfig(VerifyAndUpdateConfig):
MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"ColBERTJinaRobertaModel": JinaRobertaModelConfig,
+ "ColQwen3_5": Qwen3_5ForConditionalGenerationConfig,
"DeepseekV32ForCausalLM": DeepseekV32ForCausalLM,
"Ernie4_5_VLMoeForConditionalGeneration": Ernie4_5_VLMoeForConditionalGenerationConfig, # noqa: E501
"FalconMambaForCausalLM": MambaModelConfig,
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 7e83af3fd..1f05d14c6 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -274,8 +274,10 @@ _LATE_INTERACTION_MODELS = {
"ColBERTJinaRobertaModel": ("colbert", "ColBERTJinaRobertaModel"),
# [Multimodal]
"ColModernVBertForRetrieval": ("colmodernvbert", "ColModernVBertForRetrieval"),
+ "ColPaliForRetrieval": ("colpali", "ColPaliModel"),
"ColQwen3": ("colqwen3", "ColQwen3Model"),
"OpsColQwen3Model": ("colqwen3", "ColQwen3Model"),
+ "ColQwen3_5": ("colqwen3_5", "ColQwen3_5Model"),
"Qwen3VLNemotronEmbedModel": ("colqwen3", "ColQwen3Model"),
}