auto: pre-test commit
This commit is contained in:
@@ -323,6 +323,7 @@ class FmhaV3StageCMulti:
|
||||
# Per-tile softmax loop with online rescale.
|
||||
for kt in range(n_kv_tiles):
|
||||
si_handle = s_cons.wait_and_advance()
|
||||
cute.printf("SOFTMAX kt=%d row_max_before=%f row_sum_before=%f\n", kt, row_max, row_sum)
|
||||
|
||||
# Load S[kt]
|
||||
tTMEM_LOADrS = cute.make_rmem_tensor(tTMEM_LOADcS.shape, self.qk_acc_dtype)
|
||||
@@ -397,6 +398,7 @@ class FmhaV3StageCMulti:
|
||||
cute.arch.fence_view_async_tmem_store()
|
||||
|
||||
si_handle.release()
|
||||
cute.printf("SOFTMAX kt=%d row_max_after=%f row_sum_after=%f\n", kt, row_max, row_sum)
|
||||
softmax_done_bar.arrive()
|
||||
|
||||
# Wait for MMA's PV[N-1] to commit before reading O for normalize.
|
||||
|
||||
Reference in New Issue
Block a user