From 489c62015986f2045b52e53df986d6c2bea31ca0 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 15 May 2026 10:13:19 +0000 Subject: [PATCH] docs: document M_for_layout=128 assumption in _prepack_weight_sf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFB layout size may depend on M. Currently unverified — only tested with M=128. Added TODO to test with M=1 and M=256. --- src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py index ce1d3f33..63289236 100644 --- a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py +++ b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py @@ -112,8 +112,11 @@ def _prepack_weight_sf(weight_sf, N, K, tag): from nvfp4_megamoe_kernel.cutlass_nvfp4_gemm.kernel import prepack_sfb E = weight_sf.shape[0] - # M for layout sizing. Test with different M to confirm SFB is M-independent. - # If SFB size changes with M, bucket by M and cache per-bucket. + # M_for_layout controls CUTLASS SFB layout sizing. + # ASSUMPTION: SFB layout size is M-independent (CUTLASS tiling is over M + # but the scale factor block structure depends on N,K only). If this is + # wrong, we need to prepack per-expert with actual M. Verified only for + # M=128 — TODO: test with M=1, M=256 to confirm. M_for_layout = 128 packed = []