diag: test original code n=128+256 to confirm baseline

This commit is contained in:
2026-05-23 01:13:29 +00:00
parent 9bcddb68e1
commit 2b93b10199

View File

@@ -375,8 +375,7 @@ class FmhaV3StageCMulti:
cute.arch.fence_view_async_tmem_store()
# === Per-tile O rescale: O *= acc_scale for kt > 0 ===
# DIAG: O rescale DISABLED — testing baseline without rescale
if False:
if kt > 0:
for i in range(n_corr_tiles):
tTMEM_LOADtO_i = cute.make_tensor(
tTMEM_LOADtO.iterator + i * corr_tile_size,