diag: disable O rescale properly, test n=128+256 baseline

This commit is contained in:
2026-05-23 01:12:50 +00:00
parent 0ef41266de
commit 9bcddb68e1

View File

@@ -375,10 +375,8 @@ class FmhaV3StageCMulti:
cute.arch.fence_view_async_tmem_store()
# === Per-tile O rescale: O *= acc_scale for kt > 0 ===
# TEMP: disabled for diagnosis — O rescale not applied
if kt > 0:
pass # O rescale disabled
if kt > 0:
# DIAG: O rescale DISABLED — testing baseline without rescale
if False:
for i in range(n_corr_tiles):
tTMEM_LOADtO_i = cute.make_tensor(
tTMEM_LOADtO.iterator + i * corr_tile_size,