diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 46ba5611..d3ed7cbe 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -410,6 +410,7 @@ class FmhaKernel: final_o_bar.arrive_and_wait() # === NO-OP TMEM round-trip: re-map O from MMA layout to epilog layout === + # TODO: Replace with correction epilog (D1.5) for zero-error one-way trip tTMrO_noop = cute.make_rmem_tensor( (tTMEM_LOADcO.shape, 128 // corr_tile_size), self.acc_dtype )