diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 17b7df14..3abd839f 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -438,7 +438,7 @@ class FmhaKernel: cute.arch.fence_view_async_tmem_store() # TMA store via CUTLASS epilogue_tma_store - tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tOtO.layout) + tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tCtO_fake.layout) c_grp = pipeline.CooperativeGroup(pipeline.Agent.Thread, 32 * len(self.epilogue_warp_id)) c_pipe = pipeline.PipelineTmaStore.create(num_stages=self.num_c_stage, producer_group=c_grp) acc_cons_st = pipeline.make_pipeline_state(