From b0ebf41ee3607669afaa0ee746016d5d6f1e8dda Mon Sep 17 00:00:00 2001 From: biondizzle Date: Wed, 27 May 2026 05:00:04 +0000 Subject: [PATCH] Slice bSG_gC with mma_tile_coord (like epilogue_tma_store) --- dsv4/kernels/attention/fmha_smem_acc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dsv4/kernels/attention/fmha_smem_acc.py b/dsv4/kernels/attention/fmha_smem_acc.py index f02eaa6a..4b24d492 100644 --- a/dsv4/kernels/attention/fmha_smem_acc.py +++ b/dsv4/kernels/attention/fmha_smem_acc.py @@ -561,6 +561,8 @@ class FmhaKernel: cute.group_modes(sC, 0, 2), cute.group_modes(tCgC_epi, 0, 2), ) + # Slice gC with MMA tile coordinates (same as epilogue_tma_store) + bSG_gC = bSG_gC[(None, None, None, Int32(0), Int32(0), Int32(0))] # TMA store: only the first epilogue warp does the copy c_pipe = pipeline.PipelineTmaStore.create( num_stages=self.num_c_stage,