From 5840291ea3e468b0e46f3b2ec73dd1e920045fe0 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 12 May 2026 08:08:24 +0000 Subject: [PATCH] fix staging kernel packed_k_mask double-count --- patches/staging_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/staging_kernel.py b/patches/staging_kernel.py index 9942eb4..d409d14 100644 --- a/patches/staging_kernel.py +++ b/patches/staging_kernel.py @@ -113,7 +113,7 @@ def _deepseek_v4_stage_mega_moe_inputs_kernel( e2m1_packed = (e2m1_hi << 4 | e2m1_lo).to(tl.uint8) # [BLOCK_K // 2] k_offsets_out = k_block_id * (BLOCK_K // 2) + tl.arange(0, BLOCK_K // 2) - k_mask_out = (k_block_id * BLOCK_K // 2 + k_offsets_out) < (hidden_size // 2) + k_mask_out = k_offsets_out < (hidden_size // 2) tl.store( x_fp4 + token_id * x_stride_m + k_offsets_out * x_stride_k, e2m1_packed,