auto: pre-test commit

This commit is contained in:
2026-05-23 00:02:33 +00:00
parent 79faa14cef
commit eadd870d80

View File

@@ -323,6 +323,7 @@ class FmhaV3StageCMulti:
# Per-tile softmax loop with online rescale.
for kt in range(n_kv_tiles):
si_handle = s_cons.wait_and_advance()
cute.printf("SOFTMAX kt=%d row_max_before=%f row_sum_before=%f\n", kt, row_max, row_sum)
# Load S[kt]
tTMEM_LOADrS = cute.make_rmem_tensor(tTMEM_LOADcS.shape, self.qk_acc_dtype)
@@ -397,6 +398,7 @@ class FmhaV3StageCMulti:
cute.arch.fence_view_async_tmem_store()
si_handle.release()
cute.printf("SOFTMAX kt=%d row_max_after=%f row_sum_after=%f\n", kt, row_max, row_sum)
softmax_done_bar.arrive()
# Wait for MMA's PV[N-1] to commit before reading O for normalize.