[VLM] Enable overriding whether post layernorm is used in vision encoder + fix quant args (#9217)

Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Cyrus Leung
2024-10-23 19:27:37 +08:00
committed by GitHub
parent 3ff57ebfca
commit c18e1a3418
18 changed files with 551 additions and 253 deletions

View File

@@ -4,10 +4,13 @@
# Copyright (c) 2024 NVIDIA
# Licensed under Apache 2.0 License [see LICENSE for details]
# --------------------------------------------------------
from typing import Optional
import torch.nn as nn
from transformers import PretrainedConfig
from vllm.inputs import INPUT_REGISTRY
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from .intern_vit import InternVisionModel
@@ -56,9 +59,11 @@ class NVLM_D_Model(InternVLChatModel):
)
def _init_vision_model(self, config: PretrainedConfig,
quant_config: Optional[QuantizationConfig],
num_hidden_layers: int):
# We added additional dummy heads to the original num of heads to make
# the number of heads divisible by 8.
return InternVisionModel(config.vision_config,
quant_config=quant_config,
num_hidden_layers_override=num_hidden_layers,
num_dummy_heads=7)