fix: topk_weights_out store missing topk_offsets multiplier
This commit is contained in:
@@ -403,7 +403,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel(
|
||||
tl.store(
|
||||
topk_weights_out
|
||||
+ token_id * topk_weights_out_stride_m
|
||||
+ topk_weights_out_stride_k,
|
||||
+ topk_offsets * topk_weights_out_stride_k,
|
||||
weights,
|
||||
mask=topk_mask,
|
||||
)
|
||||
|
||||
@@ -156,7 +156,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel(
|
||||
tl.store(
|
||||
topk_weights_out
|
||||
+ token_id * topk_weights_out_stride_m
|
||||
+ topk_weights_out_stride_k,
|
||||
+ topk_offsets * topk_weights_out_stride_k,
|
||||
weights,
|
||||
mask=topk_mask,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user