From 8dc917c498d8a90548852b29c0f6efde294b0a5f Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 11 May 2026 21:02:19 +0000 Subject: [PATCH] fix: topk_weights_out store missing topk_offsets multiplier --- patches/deepseek_v4.py | 2 +- patches/staging_kernel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/patches/deepseek_v4.py b/patches/deepseek_v4.py index 0e3e0d5..77196c3 100644 --- a/patches/deepseek_v4.py +++ b/patches/deepseek_v4.py @@ -403,7 +403,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel( tl.store( topk_weights_out + token_id * topk_weights_out_stride_m - + topk_weights_out_stride_k, + + topk_offsets * topk_weights_out_stride_k, weights, mask=topk_mask, ) diff --git a/patches/staging_kernel.py b/patches/staging_kernel.py index 06d2969..83476f3 100644 --- a/patches/staging_kernel.py +++ b/patches/staging_kernel.py @@ -156,7 +156,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel( tl.store( topk_weights_out + token_id * topk_weights_out_stride_m - + topk_weights_out_stride_k, + + topk_offsets * topk_weights_out_stride_k, weights, mask=topk_mask, )