[VLM] Enable overriding whether post layernorm is used in vision encoder + fix quant args (#9217)

Co-authored-by: Isotr0py <2037008807@qq.com>
2024-10-23 19:27:37 +08:00
parent 3ff57ebfca
commit c18e1a3418
18 changed files with 551 additions and 253 deletions
--- a/vllm/model_executor/models/nvlm_d.py
+++ b/vllm/model_executor/models/nvlm_d.py
@@ -4,10 +4,13 @@
 # Copyright (c) 2024 NVIDIA
 # Licensed under Apache 2.0 License [see LICENSE for details]
 # --------------------------------------------------------
+from typing import Optional
+
 import torch.nn as nn
 from transformers import PretrainedConfig

 from vllm.inputs import INPUT_REGISTRY
+from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY

 from .intern_vit import InternVisionModel
@@ -56,9 +59,11 @@ class NVLM_D_Model(InternVLChatModel):
        )

    def _init_vision_model(self, config: PretrainedConfig,
+                           quant_config: Optional[QuantizationConfig],
                           num_hidden_layers: int):
        # We added additional dummy heads to the original num of heads to make
        # the number of heads divisible by 8.
        return InternVisionModel(config.vision_config,
+                                 quant_config=quant_config,
                                 num_hidden_layers_override=num_hidden_layers,
                                 num_dummy_heads=7)