test: verify SMEM Q layout by reading back canonical data

This commit is contained in:
2026-05-28 09:35:58 +00:00
parent 004046a6a8
commit 8f23c2aaf6

View File

@@ -70,7 +70,19 @@ test_umma_qk_hd16(
}
__syncthreads();
// Call tcgen05.mma SS
// Verify SMEM layout: read back Q from sQ and compare with original
if (tid == 0) {
// Q row 0 is in canonical layout at:
// core(0,0): offset 0, local_r=0, local_c=0..7 → indices 0..7
// core(0,1): offset 16*64=1024, local_r=0, local_c=0..7 → indices 1024..1031
for (int d = 0; d < 16; d++) {
int core_k = d / 8;
int local_c = d % 8;
int idx = core_k * 16 * 64 + local_c; // tile_mn=0, local_r=0
s_out[160 + d] = bf16_to_f32(sQ[idx]);
}
}
__syncthreads();
if (tid == 0) {
umma_ss_f16(tmem_base, desc_q, desc_k, idesc, /*accumulate=*/false);
}