diff --git a/patches/deepseek_v4.py b/patches/deepseek_v4.py index 0e3e0d5..77196c3 100644 --- a/patches/deepseek_v4.py +++ b/patches/deepseek_v4.py @@ -403,7 +403,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel( tl.store( topk_weights_out + token_id * topk_weights_out_stride_m - + topk_weights_out_stride_k, + + topk_offsets * topk_weights_out_stride_k, weights, mask=topk_mask, ) diff --git a/patches/staging_kernel.py b/patches/staging_kernel.py index 06d2969..83476f3 100644 --- a/patches/staging_kernel.py +++ b/patches/staging_kernel.py @@ -156,7 +156,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel( tl.store( topk_weights_out + token_id * topk_weights_out_stride_m - + topk_weights_out_stride_k, + + topk_offsets * topk_weights_out_stride_k, weights, mask=topk_mask, )