diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 68393f768..b6441552a 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -912,7 +912,7 @@ class BatchedTritonExperts(mk.FusedMoEExpertsModular): @staticmethod def _supports_no_act_and_mul() -> bool: - return False + return True @staticmethod def _supports_quant_scheme( diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 023cdd0b4..ee321f241 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1944,7 +1944,7 @@ class TritonExperts(mk.FusedMoEExpertsModular): @staticmethod def _supports_no_act_and_mul() -> bool: - return False + return True @staticmethod def _supports_quant_scheme( @@ -1983,6 +1983,9 @@ class TritonExperts(mk.FusedMoEExpertsModular): MoEActivation.GELU, MoEActivation.SWIGLUOAI, MoEActivation.SWIGLUSTEP, + MoEActivation.SILU_NO_MUL, + MoEActivation.GELU_NO_MUL, + MoEActivation.RELU2_NO_MUL, ] @staticmethod