Slice bSG_gC with mma_tile_coord (like epilogue_tma_store)

This commit is contained in:
2026-05-27 05:00:04 +00:00
parent eb0bf0cce0
commit b0ebf41ee3

View File

@@ -561,6 +561,8 @@ class FmhaKernel:
cute.group_modes(sC, 0, 2),
cute.group_modes(tCgC_epi, 0, 2),
)
# Slice gC with MMA tile coordinates (same as epilogue_tma_store)
bSG_gC = bSG_gC[(None, None, None, Int32(0), Int32(0), Int32(0))]
# TMA store: only the first epilogue warp does the copy
c_pipe = pipeline.PipelineTmaStore.create(
num_stages=self.num_c_stage,