fix: use cute.copy instead of cpasync.copy for TMA store
This commit is contained in:
@@ -397,7 +397,7 @@ class FmhaV3StageCMulti:
|
||||
num_threads=32 * len(self.epilogue_warp_id),
|
||||
)
|
||||
epi_bar.arrive_and_wait()
|
||||
cpasync.copy(tma_c, cute.select(sC, mode=[0, 1]), gC)
|
||||
cute.copy(tma_c, cute.select(sC, mode=[0, 1]), gC)
|
||||
cute.arch.cp_async_bulk_commit_group()
|
||||
cute.arch.cp_async_bulk_wait_group(0, read=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user