[Neuron] Add Multi-Modal model support for Neuron (#18921)

Signed-off-by: Satyajith Chilappagari <satchill@amazon.com> Co-authored-by: Ashraf Mahgoub <ashymahg@amazon.com> Co-authored-by: Rohith Nallamaddi <nalrohit@amazon.com> Co-authored-by: FeliciaLuo <luof@amazon.com> Co-authored-by: Elaine Zhao <elaineyz@amazon.com>
2025-05-31 03:39:11 -07:00
parent b8b904795d
commit 2a50ef5760
5 changed files with 235 additions and 46 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1360,6 +1360,16 @@ class ModelConfig:
    @property
    def is_encoder_decoder(self) -> bool:
        """Extract the HF encoder/decoder model flag."""
+        """
+        For Mllama, VLLM overrides HF's is_encoder_decoder flag and sets it to 
+        True to enable cross-attention
+        Neuron needs all multimodal data to be in the decoder and does not 
+        need to explicitly enable cross-attention
+        """
+        if (current_platform.is_neuron()
+                and self.hf_config.model_type == "mllama"):
+            return False
+
        return is_encoder_decoder(self.hf_config)

    @property