From 86483ca7749b3d7a2ae16283a7896c203983f1ef Mon Sep 17 00:00:00 2001 From: tomeras91 <57313761+tomeras91@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:49:05 +0200 Subject: [PATCH] [Bugfix] Disable FlashInfer TRTLLM BF16 path for non-gated MoE (#36146) Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> --- .../layers/fused_moe/flashinfer_trtllm_moe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py index 6765e3613..d04e040c8 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -25,12 +25,12 @@ def _supports_current_device() -> bool: def _supports_no_act_and_mul() -> bool: - """Supports non-gated MoE.""" - return True + """BF16 kernels do not support non-gated MoE""" + return False def _supports_activation(activation: MoEActivation) -> bool: - return activation in [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL] + return activation in [MoEActivation.SILU] def _supports_routing_method_bf16(