test: revert to 64-bit descriptors, 4 warp leaders, 32x32b read

This commit is contained in:
2026-05-28 10:21:06 +00:00
parent 32f7fa7bce
commit 57d67e6b51

View File

@@ -59,9 +59,7 @@ test_umma_qk_hd16(const bf16_t* q, const bf16_t* k,
uint64_t desc_k = make_umma_desc_kmajor_none(sK_smem, 32);
uint32_t idesc = make_idesc(128, 128);
// MMA — 4 warp leaders call the instruction (Layout D requires 4 warps)
// elect_one_sync selects 1 leader per warp. With 4 warps, 4 leaders call MMA.
int elect_one = __ballot_sync(0xFFFFFFFF, lane == 0);
// MMA — 4 warp leaders
if (lane == 0 && wid < 4) {
umma_ss_f16(tb, desc_q, desc_k, idesc, false);
}