test: zero only 32 TMEM columns

This commit is contained in:
2026-05-28 09:25:05 +00:00
parent 2b1c8ce7df
commit 3f5dcd481e

View File

@@ -71,9 +71,10 @@ test_umma_qk_hd16(
__syncthreads();
uint32_t tmem_base = *sTmemBase;
// Zero TMEM
// Zero TMEM — test: only zero first 32 columns (min power of 2)
// Note: 128 columns might be too many for tmem_store in a loop
if (wid == 0) {
for (int col = 0; col < 128; col++) {
for (int col = 0; col < 32; col++) {
tmem_store(tmem_base + col, 0, 0, 0, 0);
}
tmem_fence_store();