fix: interleave_l1_weights granularity_bf16 (not granularity) in SE
This commit is contained in:
@@ -119,7 +119,7 @@ class Nvfp4SharedExpert:
|
||||
# The fused kernel's SwiGLU epilogue expects granularity-8 interleaved gate/up.
|
||||
# The unfused path (if _fused_swiglu=False) deinterleaves the GEMM output before splitting.
|
||||
if self._fused_swiglu:
|
||||
l1_stacked = interleave_l1_weights(l1_stacked, granularity=8)
|
||||
l1_stacked = interleave_l1_weights(l1_stacked, granularity_bf16=8)
|
||||
# Stack weights and convert to K-major
|
||||
self._l1_mat_b = make_b_k_major(l1_stacked) # (1, K_packed, N_packed)
|
||||
self._l2_mat_b = make_b_k_major(l2_stacked)
|
||||
|
||||
Reference in New Issue
Block a user