[Models]: Make Multimodal config implicit in ViT implementation (#31972)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -317,7 +317,6 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
with self._mark_tower_model(vllm_config, "image"):
|
||||
self.vision_tower = MoonVitPretrainedModel(
|
||||
config.vision_config,
|
||||
multimodal_config=model_config.multimodal_config,
|
||||
prefix=maybe_prefix(prefix, "vision_tower"),
|
||||
)
|
||||
self.multi_modal_projector = KimiVLMultiModalProjector(
|
||||
|
||||
Reference in New Issue
Block a user