Debug: single PV K-tile

This commit is contained in:
2026-05-28 14:25:47 +00:00
parent 8cb32cabc9
commit 11da4daa01

View File

@@ -126,7 +126,7 @@ test_fmha_smem_p(const bf16_t* __restrict__ q, const bf16_t* __restrict__ k,
// Starting at offset kt * 2 * 1024 = kt * 2048 BF16
{
uint32_t idesc_pv = make_idesc(BLOCK_MN, HD);
for (int kt = 0; kt < NKT_PV; kt++) {
for (int kt = 0; kt < 1; kt++) {
bf16_t* sp = sP + kt * 2048;
bf16_t* sv = sV + kt * 256;
uint64_t dp = make_umma_desc_kmajor_none(__cvta_generic_to_shared(sp), BLOCK_MN);