From b66b0d6abb955f9209a0d88b1dc245f4c1c9ff98 Mon Sep 17 00:00:00 2001
From: Rabi Mishra <ramishra@redhat.com>
Date: Fri, 16 Jan 2026 13:01:10 +0530
Subject: [PATCH] fix(rocm): Enable non-gated MoE (is_act_and_mul=False)
 support on ROCm (#32244)

Signed-off-by: rabi <ramishra@redhat.com>
---
 vllm/model_executor/layers/fused_moe/layer.py        | 12 ++++++++----
 .../layers/fused_moe/unquantized_fused_moe_method.py |  7 +++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index fd3f76cb2..702052c96 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -448,9 +448,13 @@ class FusedMoE(CustomOp):
         )
 
         # ROCm aiter shared experts fusion
-        self.rocm_aiter_fmoe_enabled = rocm_aiter_ops.is_fused_moe_enabled()
+        # AITER only supports gated activations (silu/gelu), so disable it
+        # for non-gated MoE (is_act_and_mul=False)
+        self.rocm_aiter_fmoe_enabled = (
+            rocm_aiter_ops.is_fused_moe_enabled() and is_act_and_mul
+        )
         self.aiter_fmoe_shared_expert_enabled = (
-            rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
+            rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() and is_act_and_mul
         )
 
         self.num_fused_shared_experts = (
@@ -619,9 +623,9 @@ class FusedMoE(CustomOp):
         # for heuristic purposes, so it must be initialized first.
         self.quant_method: FusedMoEMethodBase = _get_quant_method()
 
-        if not self.moe_config.is_act_and_mul and not current_platform.is_cuda():
+        if not self.moe_config.is_act_and_mul and not current_platform.is_cuda_alike():
             raise NotImplementedError(
-                "is_act_and_mul=False is supported only for CUDA for now"
+                "is_act_and_mul=False is supported only for CUDA and ROCm for now"
             )
 
         if self.enable_eplb and not self.quant_method.supports_eplb:
diff --git a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
index 40a009e4b..351d63144 100644
--- a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
+++ b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
@@ -8,6 +8,7 @@ from torch.nn import Module
 
 import vllm.envs as envs
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm._aiter_ops import rocm_aiter_ops
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.layers.fused_moe.config import (
@@ -56,6 +57,12 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
             use_ep=self.moe.moe_parallel_config.use_ep,
             use_dp=self.moe.moe_parallel_config.dp_size > 1,
         )
+
+        # AITER only supports gated activations (silu/gelu), so disable it
+        # for non-gated MoE (is_act_and_mul=False)
+        self.rocm_aiter_moe_enabled = (
+            rocm_aiter_ops.is_fused_moe_enabled() and moe.is_act_and_mul
+        )
         self.kernel: mk.FusedMoEModularKernel | None = None
 
     @property