[Model] Ultravox Model: Support v0.5 Release (#12912)

Signed-off-by: Farzad Abdolhosseini <farzad@fixie.ai>
2025-02-10 14:02:48 -08:00
parent 2ae889052c
commit 08b2d845d6
12 changed files with 36 additions and 22 deletions
--- a/vllm/transformers_utils/configs/ultravox.py
+++ b/vllm/transformers_utils/configs/ultravox.py
@@ -37,6 +37,10 @@ class UltravoxConfig(transformers.PretrainedConfig):
            The LoRA configuration for finetuning the text model.
        audio_model_lora_config (`LoraConfigSimplified`, *optional*):
            The LoRA configuration for finetuning the audio model.
+        projector_ln_mid (`bool`, *optional*, defaults to `False`):
+            Whether to apply layer normalization at the middle of the
+            projector or at the end. Versions v0.4.1 and below
+            use `False`, but v0.5 and above use `True`.
    """

    model_type = "ultravox"
@@ -56,6 +60,7 @@ class UltravoxConfig(transformers.PretrainedConfig):
        projector_act: str = "swiglu",
        text_model_lora_config: Optional[Dict[str, Any]] = None,
        audio_model_lora_config: Optional[Dict[str, Any]] = None,
+        projector_ln_mid: bool = False,
        **kwargs,
    ):
        self.ignore_index = ignore_index
@@ -68,6 +73,7 @@ class UltravoxConfig(transformers.PretrainedConfig):
        self.stack_factor = stack_factor
        self.norm_init = norm_init
        self.projector_act = projector_act
+        self.projector_ln_mid = projector_ln_mid

        if text_model_id is not None:
            # Avoid circular import