fix: construct nested coordinate for CuTe layout shape ((32,4), K)
layout_sf(m, k_elem) with flat ints fails: Mismatched Ranks because the layout shape is ((32,4), K_padded), not (M, K). Decompose m into (inner_m, sub_m) = (m/4, m%4) to match the (32,4) sub-shape, and pass as make_tuple(make_tuple(inner, sub), k_elem).
This commit is contained in:
@@ -122,7 +122,15 @@ __global__ void remap_sf_to_cutlass_kernel(
|
||||
int k_group = src_idx % K_sf;
|
||||
int k_elem = k_group * SFVecSize;
|
||||
|
||||
dst[layout_sf(m, k_elem)] = src[src_idx];
|
||||
// Construct a coordinate matching the layout's top-level mode structure.
|
||||
// The layout shape from tile_to_shape(SfAtom{}, make_shape(MN, K), Step<_2, _1>)
|
||||
// has two top-level modes:
|
||||
// Mode 0 (M): shape (32, 4) — the SfAtom's M sub-structure
|
||||
// Mode 1 (K): int — the K dimension
|
||||
// For mode 0, we decompose m into (inner_m, sub_m) = (m / 4, m % 4)
|
||||
// For mode 1, we pass k_elem directly.
|
||||
auto coord = cute::make_tuple(cute::make_tuple(m / 4, m % 4), k_elem);
|
||||
dst[layout_sf(coord)] = src[src_idx];
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Reference in New Issue
Block a user