diff --git a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/cutlass_nvfp4_gemm.cu b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/cutlass_nvfp4_gemm.cu index da9196b2..b880cfb4 100644 --- a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/cutlass_nvfp4_gemm.cu +++ b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/cutlass_nvfp4_gemm.cu @@ -125,7 +125,7 @@ __global__ void remap_sf_to_cutlass_kernel( bool col_major_src = false // true if source is (K_sf, MN) row-major ) { int dst_idx = blockIdx.x * blockDim.x + threadIdx.x; - int total = cute::size(layout_sf); + int total = cute::cosize(layout_sf); if (dst_idx >= total) return; auto coord = cute::idx2crd(dst_idx, layout_sf.shape(), layout_sf.stride());