debug: minimal UMMA descriptor (just start_addr + version)

This commit is contained in:
2026-05-28 08:48:01 +00:00
parent 9a51bfa578
commit 72c97f2546

View File

@@ -69,8 +69,14 @@ fmha_qk_verify(
uint32_t sQ_smem = __cvta_generic_to_shared(sQ);
uint32_t sK_smem = __cvta_generic_to_shared(sK);
uint64_t desc_q = make_umma_desc_mn_none(sQ_smem, HD);
uint64_t desc_k = make_umma_desc_k_none(sK_smem, HD);
uint64_t desc_q = 0;
desc_q |= (static_cast<uint64_t>(sQ_smem >> 4) & 0x3FFF); // start_address
desc_q |= (static_cast<uint64_t>(1) << 46); // version
// Everything else = 0 (no strides, no swizzle)
uint64_t desc_k = 0;
desc_k |= (static_cast<uint64_t>(sK_smem >> 4) & 0x3FFF);
desc_k |= (static_cast<uint64_t>(1) << 46);
// Quick test: verify SMEM data was loaded correctly
// Write Q[0,0..3] * K[0,0..3] dot product (scalar) to s_out[0] as sanity check