[Bugfix] Fix Triton FusedMoE LoRA (#30585)

Signed-off-by: Xin Yang <xyangx@amazon.com>
2026-01-09 03:46:59 -08:00
parent 1a19e9cd87
commit e7b68f4d6c
3 changed files with 51 additions and 35 deletions
--- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
+++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
@@ -502,16 +502,18 @@ class UnfusedOAITritonExperts(BaseOAITritonExperts):
        )

        self.activation(
-            activation, intermediate_cache2, intermediate_cache1.view(-1, N)
+            activation,
+            intermediate_cache2,
+            intermediate_cache1.view(-1, N)[gather_indx.dst_indx],
        )

        # matmul_ogs grouped reduction fuse sum across multiple experts:
-        # y[dst_ind // n_expts_act, :] += x[src_ind, :]
+        # y[dst_indx // n_expts_act, :] += x
        # Need to set n_expts_act to 1 to unfuse moe_sum
        routing_data.n_expts_act = 1

        matmul_ogs(
-            intermediate_cache2,
+            intermediate_cache2[gather_indx.src_indx],
            w2,
            self.quant_config.w2_bias,
            routing_data,