test: verify SMEM Q layout by reading back canonical data
This commit is contained in:
@@ -70,7 +70,19 @@ test_umma_qk_hd16(
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Call tcgen05.mma SS
|
||||
// Verify SMEM layout: read back Q from sQ and compare with original
|
||||
if (tid == 0) {
|
||||
// Q row 0 is in canonical layout at:
|
||||
// core(0,0): offset 0, local_r=0, local_c=0..7 → indices 0..7
|
||||
// core(0,1): offset 16*64=1024, local_r=0, local_c=0..7 → indices 1024..1031
|
||||
for (int d = 0; d < 16; d++) {
|
||||
int core_k = d / 8;
|
||||
int local_c = d % 8;
|
||||
int idx = core_k * 16 * 64 + local_c; // tile_mn=0, local_r=0
|
||||
s_out[160 + d] = bf16_to_f32(sQ[idx]);
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
if (tid == 0) {
|
||||
umma_ss_f16(tmem_base, desc_q, desc_k, idesc, /*accumulate=*/false);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user