[VLM] Enable overriding whether post layernorm is used in vision encoder + fix quant args (#9217)
Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -4,10 +4,13 @@
|
||||
# Copyright (c) 2024 NVIDIA
|
||||
# Licensed under Apache 2.0 License [see LICENSE for details]
|
||||
# --------------------------------------------------------
|
||||
from typing import Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.inputs import INPUT_REGISTRY
|
||||
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
from .intern_vit import InternVisionModel
|
||||
@@ -56,9 +59,11 @@ class NVLM_D_Model(InternVLChatModel):
|
||||
)
|
||||
|
||||
def _init_vision_model(self, config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig],
|
||||
num_hidden_layers: int):
|
||||
# We added additional dummy heads to the original num of heads to make
|
||||
# the number of heads divisible by 8.
|
||||
return InternVisionModel(config.vision_config,
|
||||
quant_config=quant_config,
|
||||
num_hidden_layers_override=num_hidden_layers,
|
||||
num_dummy_heads=7)
|
||||
|
||||
Reference in New Issue
Block a user