Support Audio Extraction from MP4 Video for Nemotron Nano VL (#35539)

Signed-off-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com>
Signed-off-by: Andrii Skliar <askliar@nvidia.com>
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: Andrii <askliar@nvidia.com>
Co-authored-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com>
Co-authored-by: Andrii Skliar <askliar@oci-nrt-cs-001-vscode-01.cm.cluster>
Co-authored-by: Andrii <askliar@nvidia.com>
Co-authored-by: root <root@pool0-03748.cm.cluster>
Co-authored-by: Roger Wang <hey@rogerw.io>
Co-authored-by: root <root@pool0-02416.cm.cluster>
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
Co-authored-by: root <root@pool0-04880.cm.cluster>
This commit is contained in:
Andrii Skliar
2026-03-04 08:20:33 +01:00
committed by GitHub
parent 9e0f44bec4
commit 5d199ac8f2
5 changed files with 225 additions and 1 deletions

View File

@@ -622,6 +622,15 @@ class NemotronHForCausalLMConfig(VerifyAndUpdateConfig):
cache_config.mamba_ssm_cache_dtype = mamba_ssm_cache_dtype
class NemotronHNanoVLV2Config(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_model_config(model_config: "ModelConfig") -> None:
mm_config = model_config.multimodal_config
if mm_config is not None:
video_kwargs = mm_config.media_io_kwargs.setdefault("video", {})
video_kwargs.setdefault("video_backend", "nemotron_vl")
class Qwen3_5ForConditionalGenerationConfig(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_config(vllm_config: "VllmConfig") -> None:
@@ -661,6 +670,7 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"GteNewModel": GteNewModelConfig,
"GteNewForSequenceClassification": GteNewModelConfig,
"Gemma3TextModel": Gemma3TextModelConfig,
"NemotronH_Nano_VL_V2": NemotronHNanoVLV2Config,
"LlamaBidirectionalForSequenceClassification": LlamaBidirectionalConfig,
"LlamaBidirectionalModel": LlamaBidirectionalConfig,
"LlamaNemotronVLModel": LlamaNemotronVLConfig,