[Model] Add nvidia/llama-nemotron-embed-vl-1b-v2 multimodal embedding model (#35297)

Signed-off-by: Jakub Zakrzewski <jzakrzewski@nvidia.com>
This commit is contained in:
Jakub Zakrzewski
2026-02-26 15:17:17 +01:00
committed by GitHub
parent 7fea7250a4
commit 111d869069
8 changed files with 545 additions and 31 deletions

View File

@@ -112,6 +112,42 @@ class LlamaBidirectionalConfig(VerifyAndUpdateConfig):
model_config.pooler_config.seq_pooling_type = pooling_type
class LlamaNemotronVLConfig(VerifyAndUpdateConfig):
"""Config handler for LlamaNemotronVL embedding models."""
@staticmethod
def verify_and_update_model_config(model_config: "ModelConfig") -> None:
from vllm.config.pooler import SequencePoolingType
hf_config = model_config.hf_config
# Set bidirectional attention on the language model config
hf_config.is_causal = False
if hasattr(hf_config, "llm_config"):
hf_config.llm_config.is_causal = False
if hasattr(hf_config, "vision_config"):
hf_config.patch_size = hf_config.vision_config.patch_size
# Set up pooling type
pooling_type_map: dict[str, SequencePoolingType] = {
"avg": "MEAN",
"cls": "CLS",
"last": "LAST",
}
# Get pooling type from config (check both top-level and llm_config)
pooling = getattr(hf_config, "pooling", None)
if pooling is None and hasattr(hf_config, "llm_config"):
pooling = getattr(hf_config.llm_config, "pooling", "avg")
pooling_type = pooling_type_map.get(pooling)
if pooling_type is None:
raise ValueError(f"pool_type {pooling!r} not supported")
model_config.pooler_config.seq_pooling_type = pooling_type
class NomicBertModelConfig(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_model_config(model_config: "ModelConfig") -> None:
@@ -619,6 +655,7 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"Gemma3TextModel": Gemma3TextModelConfig,
"LlamaBidirectionalForSequenceClassification": LlamaBidirectionalConfig,
"LlamaBidirectionalModel": LlamaBidirectionalConfig,
"LlamaNemotronVLModel": LlamaNemotronVLConfig,
"NomicBertModel": NomicBertModelConfig,
"Qwen2ForProcessRewardModel": Qwen2ForProcessRewardModelConfig,
"Qwen2ForRewardModel": Qwen2ForRewardModelConfig,