[Model] Add Gemma3 GGUF multimodal support (#27772)

Signed-off-by: Luciano Martins <lucianommartins@users.noreply.github.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Luciano Martins <lucianommartins@users.noreply.github.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Luciano Martins
2025-11-18 13:56:29 -03:00
committed by GitHub
parent 49a986ecd4
commit c2612371ad
14 changed files with 752 additions and 86 deletions

View File

@@ -477,6 +477,17 @@ def is_interleaved(config: PretrainedConfig) -> bool:
return False
def uses_custom_attention_masks(config: PretrainedConfig) -> bool:
"""Detect if model uses custom attention mask generation for multimodal.
Some multimodal models require custom attention masks that enable
bidirectional attention between image tokens while maintaining causal
attention for text tokens. Currently applies to Gemma3 multimodal models.
"""
architectures = getattr(config, "architectures", [])
return "Gemma3ForConditionalGeneration" in architectures
def _maybe_update_auto_config_kwargs(kwargs: dict[str, Any], model_type: str):
"""
Update kwargs for AutoConfig initialization based on model_type