fix: restore col_major_src handling for SFB source layout
SFB scales arrive as (K_sf, N) row-major after transpose+contiguous in weight_transform.py. The col_major_src flag correctly describes this. Don't assume both sources are (MN, K_sf).
This commit is contained in:
@@ -135,18 +135,18 @@ __global__ void remap_sf_to_cutlass_kernel(
|
||||
int mn, k_sf_val, src_idx;
|
||||
|
||||
if (col_major_src) {
|
||||
// source is row-major (K_sf, MN), e.g. SFB stored as (K_sf, N)
|
||||
// Source is (K_sf, MN) row-major in memory — e.g. SFB after transpose+contiguous
|
||||
k_sf_val = tid / MN;
|
||||
mn = tid % MN;
|
||||
src_idx = tid;
|
||||
src_idx = tid; // tid = k_sf_val * MN + mn
|
||||
} else {
|
||||
// source is row-major (MN, K_sf), e.g. SFA stored as (M, K_sf)
|
||||
// Source is (MN, K_sf) row-major — e.g. SFA
|
||||
mn = tid / K_sf;
|
||||
k_sf_val = tid % K_sf;
|
||||
src_idx = tid;
|
||||
src_idx = tid; // tid = mn * K_sf + k_sf_val
|
||||
}
|
||||
|
||||
// Use layout forward mapping: source (mn, k_sf*16) -> dst_idx
|
||||
// Use layout forward mapping: (mn, k_sf*16) -> dst_idx
|
||||
constexpr int LayoutRank = cute::rank_v<decltype(layout_sf.shape())>;
|
||||
int dst_idx = 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user