diag: test original code n=128+256 to confirm baseline
This commit is contained in:
@@ -375,8 +375,7 @@ class FmhaV3StageCMulti:
|
||||
cute.arch.fence_view_async_tmem_store()
|
||||
|
||||
# === Per-tile O rescale: O *= acc_scale for kt > 0 ===
|
||||
# DIAG: O rescale DISABLED — testing baseline without rescale
|
||||
if False:
|
||||
if kt > 0:
|
||||
for i in range(n_corr_tiles):
|
||||
tTMEM_LOADtO_i = cute.make_tensor(
|
||||
tTMEM_LOADtO.iterator + i * corr_tile_size,
|
||||
|
||||
Reference in New Issue
Block a user