[Model] Standardize common vision encoders (#31947)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-08 18:33:16 +08:00
parent d1b6fe007f
commit 5576227bc1
19 changed files with 253 additions and 173 deletions
--- a/vllm/model_executor/models/minimax_vl_01.py
+++ b/vllm/model_executor/models/minimax_vl_01.py
@@ -204,7 +204,8 @@ class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, Support
        # TODO: Optionally initializes this for supporting embeddings.
        self.vision_tower = init_vision_tower_for_llava(
            config,
-            quant_config,
+            quant_config=quant_config,
+            multimodal_config=multimodal_config,
            require_post_norm=False,
            prefix=maybe_prefix(prefix, "vision_tower"),
        )