[Bugfix] Fix prefix strings for quantized VLMs (#9772)

This commit is contained in:
Michael Goin
2024-10-29 19:02:59 -04:00
committed by GitHub
parent 8d7724104a
commit bc73e9821c
20 changed files with 288 additions and 97 deletions

View File

@@ -257,14 +257,20 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal,
# Initialize the vision tower only up to the required feature layer
self.vision_tower = init_vision_tower_for_llava(
config, quant_config, require_post_norm=False)
config,
quant_config,
require_post_norm=False,
prefix="vision_tower")
self.vision_resampler = LlavaNextVideoPooler(config)
self.multi_modal_projector = LlavaNextMultiModalProjector(
vision_hidden_size=config.vision_config.hidden_size,
text_hidden_size=config.text_config.hidden_size,
projector_hidden_act=config.projector_hidden_act)
self.language_model = init_vllm_registered_model(
config.text_config, cache_config, quant_config)
config.text_config,
cache_config,
quant_config,
prefix="language_model")
self.make_empty_intermediate_tensors = (
self.language_model.model.make_empty_intermediate_tensors)