diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index b2cc57cb6..b1be3a376 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -121,10 +121,6 @@ VLM_TEST_SETTINGS = { ), auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output, - dtype="bfloat16", - marks=[ - pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask") - ], ), "qwen2_5_vl": VLMTestInfo( models=["Qwen/Qwen2.5-VL-3B-Instruct"], diff --git a/vllm/config/model.py b/vllm/config/model.py index f98dc48fe..88da91058 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1097,9 +1097,7 @@ class ModelConfig: """Whether to use bidirectional attention for mm positions.""" MM_PREFIX_LM_MODELS = ( "gemma3", - # TODO(Isotr0py): Disable paligemma for now before - # we supports soft cap attention for FlexAttention - # "paligemma", + "paligemma", ) if not hasattr(self.hf_config, "model_type"): return False