From eadd870d80f98fa2c7e35fe4fca989b4d4fc1a66 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 23 May 2026 00:02:33 +0000 Subject: [PATCH] auto: pre-test commit --- tests/fmha_v3_stage_c_example9.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/fmha_v3_stage_c_example9.py b/tests/fmha_v3_stage_c_example9.py index d2f8a54b..ba1e6fa2 100644 --- a/tests/fmha_v3_stage_c_example9.py +++ b/tests/fmha_v3_stage_c_example9.py @@ -323,6 +323,7 @@ class FmhaV3StageCMulti: # Per-tile softmax loop with online rescale. for kt in range(n_kv_tiles): si_handle = s_cons.wait_and_advance() + cute.printf("SOFTMAX kt=%d row_max_before=%f row_sum_before=%f\n", kt, row_max, row_sum) # Load S[kt] tTMEM_LOADrS = cute.make_rmem_tensor(tTMEM_LOADcS.shape, self.qk_acc_dtype) @@ -397,6 +398,7 @@ class FmhaV3StageCMulti: cute.arch.fence_view_async_tmem_store() si_handle.release() + cute.printf("SOFTMAX kt=%d row_max_after=%f row_sum_after=%f\n", kt, row_max, row_sum) softmax_done_bar.arrive() # Wait for MMA's PV[N-1] to commit before reading O for normalize.