From 2af767a90c2a6ea250e60e4d3a7b096e4d8aa4fc Mon Sep 17 00:00:00 2001 From: biondizzle Date: Wed, 27 May 2026 05:28:43 +0000 Subject: [PATCH] Try full tensor TMA copy without slicing --- dsv4/kernels/attention/fmha_smem_acc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha_smem_acc.py b/dsv4/kernels/attention/fmha_smem_acc.py index 15ec7b42..9e52d7b7 100644 --- a/dsv4/kernels/attention/fmha_smem_acc.py +++ b/dsv4/kernels/attention/fmha_smem_acc.py @@ -634,7 +634,7 @@ class FmhaKernel: cute.group_modes(gO, 0, 2), ) if warp_idx == self.epilogue_warp_id[0]: - cute.copy(tma_c, tOsC[None, Int32(0)], tOgO[None, Int32(0)]) + cute.copy(tma_c, tOsC, tOgO) cute.arch.cp_async_bulk_commit_group() cute.arch.cp_async_bulk_wait_group(0, read=True)