test: log canonical indices we write Q to

This commit is contained in:
2026-05-28 12:01:28 +00:00
parent facd509c3c
commit e587e26b06

View File

@@ -58,6 +58,8 @@ test_umma_qk_hd64(const bf16_t* q, const bf16_t* k,
int core_k = d / 8, local_c = d % 8;
int idx = core_k * 16 * 64 + local_c; // tile_mn=0, local_r=0
sQ[idx] = q[d];
// Also write to flat offset for verification
if (d < 8) s_out[220 + d] = (float)idx; // Log the indices we write to
}
// Write K (sk, hd) to sK in canonical layout
for (int i = tid; i < sk * hd; i += N_WARPS * 32) {