fix(rocm): Enable non-gated MoE (is_act_and_mul=False) support on ROCm (#32244)

Signed-off-by: rabi <ramishra@redhat.com>
This commit is contained in:
Rabi Mishra
2026-01-16 13:01:10 +05:30
committed by GitHub
parent 03da3b52ef
commit b66b0d6abb
2 changed files with 15 additions and 4 deletions

View File

@@ -448,9 +448,13 @@ class FusedMoE(CustomOp):
)
# ROCm aiter shared experts fusion
self.rocm_aiter_fmoe_enabled = rocm_aiter_ops.is_fused_moe_enabled()
# AITER only supports gated activations (silu/gelu), so disable it
# for non-gated MoE (is_act_and_mul=False)
self.rocm_aiter_fmoe_enabled = (
rocm_aiter_ops.is_fused_moe_enabled() and is_act_and_mul
)
self.aiter_fmoe_shared_expert_enabled = (
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() and is_act_and_mul
)
self.num_fused_shared_experts = (
@@ -619,9 +623,9 @@ class FusedMoE(CustomOp):
# for heuristic purposes, so it must be initialized first.
self.quant_method: FusedMoEMethodBase = _get_quant_method()
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda():
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda_alike():
raise NotImplementedError(
"is_act_and_mul=False is supported only for CUDA for now"
"is_act_and_mul=False is supported only for CUDA and ROCm for now"
)
if self.enable_eplb and not self.quant_method.supports_eplb:

View File

@@ -8,6 +8,7 @@ from torch.nn import Module
import vllm.envs as envs
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm._aiter_ops import rocm_aiter_ops
from vllm.logger import init_logger
from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.layers.fused_moe.config import (
@@ -56,6 +57,12 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
use_ep=self.moe.moe_parallel_config.use_ep,
use_dp=self.moe.moe_parallel_config.dp_size > 1,
)
# AITER only supports gated activations (silu/gelu), so disable it
# for non-gated MoE (is_act_and_mul=False)
self.rocm_aiter_moe_enabled = (
rocm_aiter_ops.is_fused_moe_enabled() and moe.is_act_and_mul
)
self.kernel: mk.FusedMoEModularKernel | None = None
@property