fix: kGranK=32 in C++ binding (was still 16 from old block16 code)

2026-05-11 09:09:32 +00:00
parent cd7a612175
commit b856c57ba6
1 changed files with 2 additions and 2 deletions
--- a/csrc/apis/mega_nvfp4.hpp
+++ b/csrc/apis/mega_nvfp4.hpp
@@ -175,8 +175,8 @@ static void fp8_nvfp4_mega_moe(
    DG_HOST_ASSERT(l1_weights.is_contiguous() and l2_weights.is_contiguous());

    // Check weight SF layout for UE4M3 packing, MN-major, and TMA alignment
-    // NVFP4: kGranK=16, SF packed as int32 (4 UE4M3 bytes per int32)
-    constexpr int kGranMN = 1, kGranK = 16;
+    // NVFP4 block32: kGranK=32, SF packed as int32 (4 UE4M3 bytes per int32)
+    constexpr int kGranMN = 1, kGranK = 32;
    check_sf_layout(l1_weights_sf, intermediate_hidden * 2, hidden, kGranMN, kGranK,
                    num_experts_per_rank, true, false, torch::kInt);
    check_sf_layout(l2_weights_sf, hidden, intermediate_hidden, kGranMN, kGranK,