From fa028207aa9d4baa6cfc4863f6f54c4277884e6e Mon Sep 17 00:00:00 2001 From: Shaun Kotek <93727115+shaunkotek@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:01:18 +0200 Subject: [PATCH] Fix/resupport nongated fused moe triton (#36412) Signed-off-by: Shaun Kotek - Nvidia Signed-off-by: Natan Bagrov Signed-off-by: Daniel Serebrenik Signed-off-by: zjy0516 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: yewentao256 Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: liweiguang Signed-off-by: wang.yuqi Signed-off-by: wang.yuqi Signed-off-by: Alex Brooks Signed-off-by: DarkLight1337 Signed-off-by: cong-or Signed-off-by: Tushar Shetty Signed-off-by: Tushar Shetty <54362365+tusharshetty61@users.noreply.github.com> Signed-off-by: jiang1.li Signed-off-by: zhenwei-intel Signed-off-by: Xin Yang Signed-off-by: Kevin H. Luu Signed-off-by: Isotr0py Co-authored-by: nvnbagrov Co-authored-by: Sage <80211083+sagearc@users.noreply.github.com> Co-authored-by: danisereb Co-authored-by: Jiangyun Zhu Co-authored-by: Kunshang Ji Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Weiguang Li Co-authored-by: Claude Opus 4.6 Co-authored-by: Li, Jiang Co-authored-by: wang.yuqi Co-authored-by: Alex Brooks Co-authored-by: Cyrus Leung Co-authored-by: cong-or Co-authored-by: Tushar Shetty <54362365+tusharshetty61@users.noreply.github.com> Co-authored-by: liuzhenwei Co-authored-by: Xin Yang <105740670+xyang16@users.noreply.github.com> Co-authored-by: Kevin H. Luu Co-authored-by: Isotr0py --- vllm/model_executor/layers/fused_moe/fused_batched_moe.py | 2 +- vllm/model_executor/layers/fused_moe/fused_moe.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 68393f768..b6441552a 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -912,7 +912,7 @@ class BatchedTritonExperts(mk.FusedMoEExpertsModular): @staticmethod def _supports_no_act_and_mul() -> bool: - return False + return True @staticmethod def _supports_quant_scheme( diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 023cdd0b4..ee321f241 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1944,7 +1944,7 @@ class TritonExperts(mk.FusedMoEExpertsModular): @staticmethod def _supports_no_act_and_mul() -> bool: - return False + return True @staticmethod def _supports_quant_scheme( @@ -1983,6 +1983,9 @@ class TritonExperts(mk.FusedMoEExpertsModular): MoEActivation.GELU, MoEActivation.SWIGLUOAI, MoEActivation.SWIGLUSTEP, + MoEActivation.SILU_NO_MUL, + MoEActivation.GELU_NO_MUL, + MoEActivation.RELU2_NO_MUL, ] @staticmethod