From 127ded0a9e3dce17fdcff6a105900439c076b9ba Mon Sep 17 00:00:00 2001 From: Peter Salas Date: Thu, 11 Sep 2025 11:52:24 -0700 Subject: [PATCH] [Ultravox] Use wrapped_model_config to instantiate inner model (#24679) Signed-off-by: Peter Salas --- vllm/model_executor/models/ultravox.py | 4 ++-- vllm/transformers_utils/configs/ultravox.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 988530903..9e28b0c44 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -418,7 +418,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() - config = vllm_config.model_config.hf_config + config: UltravoxConfig = vllm_config.model_config.hf_config multimodal_config = vllm_config.model_config.multimodal_config self.config = config self.multi_modal_config = multimodal_config @@ -438,7 +438,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): self.multi_modal_projector = UltravoxProjector(config) self.language_model = init_vllm_registered_model( vllm_config=vllm_config, - hf_config=config.text_config, + hf_config=config.wrapped_model_config, prefix=maybe_prefix(prefix, "language_model"), ) if config.text_model_id is not None: diff --git a/vllm/transformers_utils/configs/ultravox.py b/vllm/transformers_utils/configs/ultravox.py index 71266b932..e67479516 100644 --- a/vllm/transformers_utils/configs/ultravox.py +++ b/vllm/transformers_utils/configs/ultravox.py @@ -46,7 +46,7 @@ class UltravoxConfig(transformers.PretrainedConfig): projector or at the end. Versions v0.4.1 and below use `False`, but v0.5 and above use `True`. """ - + wrapped_model_config: transformers.PretrainedConfig model_type = "ultravox" audio_token = "<|audio|>" is_composition = False @@ -113,9 +113,8 @@ class UltravoxConfig(transformers.PretrainedConfig): return super().__setattr__(key, value) @property - def text_config(self) -> Optional[transformers.PretrainedConfig]: + def text_config(self) -> transformers.PretrainedConfig: # When Ultravox wraps a multi-modal model (e.g. Gemma), we instantiate # the full model, but the text config is the text config of the inner # model. - return (self.wrapped_model_config.get_text_config() - if self.wrapped_model_config else None) + return self.wrapped_model_config.get_text_config()