fix: restore col_major_src handling for SFB source layout

SFB scales arrive as (K_sf, N) row-major after transpose+contiguous
in weight_transform.py. The col_major_src flag correctly describes
this. Don't assume both sources are (MN, K_sf).
This commit is contained in:
2026-05-15 21:19:58 +00:00
parent 63e67e1025
commit f6fd549800

View File

@@ -135,18 +135,18 @@ __global__ void remap_sf_to_cutlass_kernel(
int mn, k_sf_val, src_idx;
if (col_major_src) {
// source is row-major (K_sf, MN), e.g. SFB stored as (K_sf, N)
// Source is (K_sf, MN) row-major in memory — e.g. SFB after transpose+contiguous
k_sf_val = tid / MN;
mn = tid % MN;
src_idx = tid;
src_idx = tid; // tid = k_sf_val * MN + mn
} else {
// source is row-major (MN, K_sf), e.g. SFA stored as (M, K_sf)
// Source is (MN, K_sf) row-major — e.g. SFA
mn = tid / K_sf;
k_sf_val = tid % K_sf;
src_idx = tid;
src_idx = tid; // tid = mn * K_sf + k_sf_val
}
// Use layout forward mapping: source (mn, k_sf*16) -> dst_idx
// Use layout forward mapping: (mn, k_sf*16) -> dst_idx
constexpr int LayoutRank = cute::rank_v<decltype(layout_sf.shape())>;
int dst_idx = 0;