fix: interleave_l1_weights granularity_bf16 (not granularity) in SE
This commit is contained in:
@@ -119,7 +119,7 @@ class Nvfp4SharedExpert:
|
|||||||
# The fused kernel's SwiGLU epilogue expects granularity-8 interleaved gate/up.
|
# The fused kernel's SwiGLU epilogue expects granularity-8 interleaved gate/up.
|
||||||
# The unfused path (if _fused_swiglu=False) deinterleaves the GEMM output before splitting.
|
# The unfused path (if _fused_swiglu=False) deinterleaves the GEMM output before splitting.
|
||||||
if self._fused_swiglu:
|
if self._fused_swiglu:
|
||||||
l1_stacked = interleave_l1_weights(l1_stacked, granularity=8)
|
l1_stacked = interleave_l1_weights(l1_stacked, granularity_bf16=8)
|
||||||
# Stack weights and convert to K-major
|
# Stack weights and convert to K-major
|
||||||
self._l1_mat_b = make_b_k_major(l1_stacked) # (1, K_packed, N_packed)
|
self._l1_mat_b = make_b_k_major(l1_stacked) # (1, K_packed, N_packed)
|
||||||
self._l2_mat_b = make_b_k_major(l2_stacked)
|
self._l2_mat_b = make_b_k_major(l2_stacked)
|
||||||
|
|||||||
Reference in New Issue
Block a user