auto: pre-test commit

This commit is contained in:
2026-05-23 00:29:49 +00:00
parent 17f80de485
commit fc5b6811d2

View File

@@ -323,6 +323,8 @@ class FmhaV3StageCMulti:
# the missing rescale shows as accuracy drift.
for kt in range(n_kv_tiles):
si_handle = s_cons.wait_and_advance()
if kt == 0:
cute.printf("SOFTMAX n_kv_tiles=%d\n", Int32(n_kv_tiles))
# Load S[kt]
tTMEM_LOADrS = cute.make_rmem_tensor(tTMEM_LOADcS.shape, self.qk_acc_dtype)