fix: interleave_l1_weights granularity_bf16 (not granularity) in SE

This commit is contained in:
2026-06-02 08:29:03 +00:00
parent 553275d810
commit 1726cb64a9

View File

@@ -119,7 +119,7 @@ class Nvfp4SharedExpert:
# The fused kernel's SwiGLU epilogue expects granularity-8 interleaved gate/up.
# The unfused path (if _fused_swiglu=False) deinterleaves the GEMM output before splitting.
if self._fused_swiglu:
l1_stacked = interleave_l1_weights(l1_stacked, granularity=8)
l1_stacked = interleave_l1_weights(l1_stacked, granularity_bf16=8)
# Stack weights and convert to K-major
self._l1_mat_b = make_b_k_major(l1_stacked) # (1, K_packed, N_packed)
self._l2_mat_b = make_b_k_major(l2_stacked)