fix: topk_weights_out store missing topk_offsets multiplier

This commit is contained in:
2026-05-11 21:02:19 +00:00
parent 17ba5a9d7b
commit 8dc917c498
2 changed files with 2 additions and 2 deletions

View File

@@ -403,7 +403,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel(
tl.store(
topk_weights_out
+ token_id * topk_weights_out_stride_m
+ topk_weights_out_stride_k,
+ topk_offsets * topk_weights_out_stride_k,
weights,
mask=topk_mask,
)

View File

@@ -156,7 +156,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel(
tl.store(
topk_weights_out
+ token_id * topk_weights_out_stride_m
+ topk_weights_out_stride_k,
+ topk_offsets * topk_weights_out_stride_k,
weights,
mask=topk_mask,
)