Fix gO slice: use separate Int32(0) instead of tuple
This commit is contained in:
@@ -626,7 +626,7 @@ class FmhaKernel:
|
||||
gO_qdl = cute.flat_divide(
|
||||
mC, cute.select(self.pv_mma_tiler, mode=[0, 1])
|
||||
)
|
||||
gO = gO_qdl[None, None, None, Int32(0), (Int32(0), Int32(0))]
|
||||
gO = gO_qdl[None, None, None, Int32(0), Int32(0)]
|
||||
tOsO, tOgO = cpasync.tma_partition(
|
||||
tma_c, 0, cute.make_layout(1),
|
||||
cute.group_modes(sC, 0, 2),
|
||||
|
||||
Reference in New Issue
Block a user