diff --git a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py
index ce1d3f33..63289236 100644
--- a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py
+++ b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py
@@ -112,8 +112,11 @@ def _prepack_weight_sf(weight_sf, N, K, tag):
     from nvfp4_megamoe_kernel.cutlass_nvfp4_gemm.kernel import prepack_sfb
 
     E = weight_sf.shape[0]
-    # M for layout sizing. Test with different M to confirm SFB is M-independent.
-    # If SFB size changes with M, bucket by M and cache per-bucket.
+    # M_for_layout controls CUTLASS SFB layout sizing.
+    # ASSUMPTION: SFB layout size is M-independent (CUTLASS tiling is over M
+    # but the scale factor block structure depends on N,K only). If this is
+    # wrong, we need to prepack per-expert with actual M. Verified only for
+    # M=128 — TODO: test with M=1, M=256 to confirm.
     M_for_layout = 128
 
     packed = []