From b856c57ba61c66319908aff1150ef39c22163635 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 11 May 2026 09:09:32 +0000 Subject: [PATCH] fix: kGranK=32 in C++ binding (was still 16 from old block16 code) --- csrc/apis/mega_nvfp4.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/apis/mega_nvfp4.hpp b/csrc/apis/mega_nvfp4.hpp index 83b215a..7467519 100644 --- a/csrc/apis/mega_nvfp4.hpp +++ b/csrc/apis/mega_nvfp4.hpp @@ -175,8 +175,8 @@ static void fp8_nvfp4_mega_moe( DG_HOST_ASSERT(l1_weights.is_contiguous() and l2_weights.is_contiguous()); // Check weight SF layout for UE4M3 packing, MN-major, and TMA alignment - // NVFP4: kGranK=16, SF packed as int32 (4 UE4M3 bytes per int32) - constexpr int kGranMN = 1, kGranK = 16; + // NVFP4 block32: kGranK=32, SF packed as int32 (4 UE4M3 bytes per int32) + constexpr int kGranMN = 1, kGranK = 32; check_sf_layout(l1_weights_sf, intermediate_hidden * 2, hidden, kGranMN, kGranK, num_experts_per_rank, true, false, torch::kInt); check_sf_layout(l2_weights_sf, hidden, intermediate_hidden, kGranMN, kGranK,