[Bugfix] Fix Triton FusedMoE LoRA (#30585)

Signed-off-by: Xin Yang <xyangx@amazon.com>
This commit is contained in:
Xin Yang
2026-01-09 03:46:59 -08:00
committed by GitHub
parent 1a19e9cd87
commit e7b68f4d6c
3 changed files with 51 additions and 35 deletions

View File

@@ -502,16 +502,18 @@ class UnfusedOAITritonExperts(BaseOAITritonExperts):
)
self.activation(
activation, intermediate_cache2, intermediate_cache1.view(-1, N)
activation,
intermediate_cache2,
intermediate_cache1.view(-1, N)[gather_indx.dst_indx],
)
# matmul_ogs grouped reduction fuse sum across multiple experts:
# y[dst_ind // n_expts_act, :] += x[src_ind, :]
# y[dst_indx // n_expts_act, :] += x
# Need to set n_expts_act to 1 to unfuse moe_sum
routing_data.n_expts_act = 1
matmul_ogs(
intermediate_cache2,
intermediate_cache2[gather_indx.src_indx],
w2,
self.quant_config.w2_bias,
routing_data,