test: add SMEM verify for HD=64 K-tile offsets

This commit is contained in:
2026-05-28 11:49:44 +00:00
parent 4483539f01
commit 4fd41365de

View File

@@ -75,7 +75,16 @@ test_umma_qk_hd64(const bf16_t* q, const bf16_t* k,
}
__syncthreads();
// Construct base descriptors for Q and K
// Verify SMEM data for first K-tile (columns 0-15)
if (tid == 0) {
// Q row 0, d=0..7: core(0,0) at offset 0, local_r=0, local_c=d
for (int d = 0; d < 8; d++)
s_out[200+d] = bf16_to_f32(sQ[d]); // core(0,0), row 0, col d
// Q row 0, d=16..23: core(0,2) at offset 2*1024 = 2048, local_r=0, local_c=d-16
for (int d = 0; d < 8; d++)
s_out[208+d] = bf16_to_f32(sQ[2048 + d]); // core(0,2), row 0, col 0..7
}
__syncthreads();
uint32_t sQ_smem = __cvta_generic_to_shared(sQ);
uint32_t sK_smem = __cvta_generic_to_shared(sK);