diff --git a/dsv4/kernels/attention/fmha_smem_acc.py b/dsv4/kernels/attention/fmha_smem_acc.py index 4b24d492..bd14b24f 100644 --- a/dsv4/kernels/attention/fmha_smem_acc.py +++ b/dsv4/kernels/attention/fmha_smem_acc.py @@ -570,7 +570,7 @@ class FmhaKernel: ) c_pipe.producer_acquire() if warp_idx == self.epilogue_warp_id[0]: - cute.copy(tma_c, bSG_sC[(None, Int32(0))], bSG_gC[(None, Int32(0))]) + cute.copy(tma_c, bSG_sC[(None, None, Int32(0))], bSG_gC[(None, None, Int32(0))]) c_pipe.producer_commit() c_pipe.producer_acquire() epilog_sync_barrier.arrive_and_wait()