debug: skip TMEM alloc — test SMEM loads only

This commit is contained in:
2026-05-28 08:49:37 +00:00
parent 72c97f2546
commit a04d794979

View File

@@ -48,6 +48,9 @@ fmha_qk_verify(
}
__syncthreads();
// SKIP TMEM — just test SMEM loads and scalar QK
// No TMEM alloc, no MMA
/*
// TMEM alloc for S: 128 columns
if (wid == 0) {
uint32_t smem_ptr = __cvta_generic_to_shared(sTmemBase);
@@ -55,6 +58,8 @@ fmha_qk_verify(
}
__syncthreads();
uint32_t tmem_base = *sTmemBase;
*/
uint32_t tmem_base = 0; // dummy
// Zero TMEM S
if (wid == 0) {