D1.5: Use tCtO_fake layout for epilogue_tma_store (needs STAGE dim)
This commit is contained in:
@@ -438,7 +438,7 @@ class FmhaKernel:
|
||||
cute.arch.fence_view_async_tmem_store()
|
||||
|
||||
# TMA store via CUTLASS epilogue_tma_store
|
||||
tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tOtO.layout)
|
||||
tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tCtO_fake.layout)
|
||||
c_grp = pipeline.CooperativeGroup(pipeline.Agent.Thread, 32 * len(self.epilogue_warp_id))
|
||||
c_pipe = pipeline.PipelineTmaStore.create(num_stages=self.num_c_stage, producer_group=c_grp)
|
||||
acc_cons_st = pipeline.make_pipeline_state(
|
||||
|
||||
Reference in New Issue
Block a user