test: log canonical indices we write Q to
This commit is contained in:
@@ -58,6 +58,8 @@ test_umma_qk_hd64(const bf16_t* q, const bf16_t* k,
|
||||
int core_k = d / 8, local_c = d % 8;
|
||||
int idx = core_k * 16 * 64 + local_c; // tile_mn=0, local_r=0
|
||||
sQ[idx] = q[d];
|
||||
// Also write to flat offset for verification
|
||||
if (d < 8) s_out[220 + d] = (float)idx; // Log the indices we write to
|
||||
}
|
||||
// Write K (sk, hd) to sK in canonical layout
|
||||
for (int i = tid; i < sk * hd; i += N_WARPS * 32) {
|
||||
|
||||
Reference in New Issue
Block a user