Fix gO slice: use separate Int32(0) instead of tuple

This commit is contained in:
2026-05-27 05:25:33 +00:00
parent bf36979a8d
commit 4a2a06f9e1

View File

@@ -626,7 +626,7 @@ class FmhaKernel:
gO_qdl = cute.flat_divide(
mC, cute.select(self.pv_mma_tiler, mode=[0, 1])
)
gO = gO_qdl[None, None, None, Int32(0), (Int32(0), Int32(0))]
gO = gO_qdl[None, None, None, Int32(0), Int32(0)]
tOsO, tOgO = cpasync.tma_partition(
tma_c, 0, cute.make_layout(1),
cute.group_modes(sC, 0, 2),