fix(rocm): Enable non-gated MoE (is_act_and_mul=False) support on ROCm (#32244)
Signed-off-by: rabi <ramishra@redhat.com>
This commit is contained in:
@@ -448,9 +448,13 @@ class FusedMoE(CustomOp):
|
||||
)
|
||||
|
||||
# ROCm aiter shared experts fusion
|
||||
self.rocm_aiter_fmoe_enabled = rocm_aiter_ops.is_fused_moe_enabled()
|
||||
# AITER only supports gated activations (silu/gelu), so disable it
|
||||
# for non-gated MoE (is_act_and_mul=False)
|
||||
self.rocm_aiter_fmoe_enabled = (
|
||||
rocm_aiter_ops.is_fused_moe_enabled() and is_act_and_mul
|
||||
)
|
||||
self.aiter_fmoe_shared_expert_enabled = (
|
||||
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
|
||||
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() and is_act_and_mul
|
||||
)
|
||||
|
||||
self.num_fused_shared_experts = (
|
||||
@@ -619,9 +623,9 @@ class FusedMoE(CustomOp):
|
||||
# for heuristic purposes, so it must be initialized first.
|
||||
self.quant_method: FusedMoEMethodBase = _get_quant_method()
|
||||
|
||||
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda():
|
||||
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda_alike():
|
||||
raise NotImplementedError(
|
||||
"is_act_and_mul=False is supported only for CUDA for now"
|
||||
"is_act_and_mul=False is supported only for CUDA and ROCm for now"
|
||||
)
|
||||
|
||||
if self.enable_eplb and not self.quant_method.supports_eplb:
|
||||
|
||||
@@ -8,6 +8,7 @@ from torch.nn import Module
|
||||
|
||||
import vllm.envs as envs
|
||||
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
|
||||
from vllm._aiter_ops import rocm_aiter_ops
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
from vllm.model_executor.layers.fused_moe.config import (
|
||||
@@ -56,6 +57,12 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
use_ep=self.moe.moe_parallel_config.use_ep,
|
||||
use_dp=self.moe.moe_parallel_config.dp_size > 1,
|
||||
)
|
||||
|
||||
# AITER only supports gated activations (silu/gelu), so disable it
|
||||
# for non-gated MoE (is_act_and_mul=False)
|
||||
self.rocm_aiter_moe_enabled = (
|
||||
rocm_aiter_ops.is_fused_moe_enabled() and moe.is_act_and_mul
|
||||
)
|
||||
self.kernel: mk.FusedMoEModularKernel | None = None
|
||||
|
||||
@property
|
||||
|
||||
Reference in New Issue
Block a user