[Misc] Enable Paligemma's PrefixLM attention mask computation (#31725)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-01-06 03:31:49 +08:00
parent d8e38d4939
commit 51e38a8e30
2 changed files with 1 additions and 7 deletions
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1097,9 +1097,7 @@ class ModelConfig:
        """Whether to use bidirectional attention for mm positions."""
        MM_PREFIX_LM_MODELS = (
            "gemma3",
-            # TODO(Isotr0py): Disable paligemma for now before
-            # we supports soft cap attention for FlexAttention
-            # "paligemma",
+            "paligemma",
        )
        if not hasattr(self.hf_config, "model_type"):
            return False