From 4e593f1cad30341fe7a2be231befc6518196114e Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 24 May 2026 02:41:32 +0000 Subject: [PATCH] D1.5: Use tCtO_fake layout for epilogue_tma_store (needs STAGE dim) --- dsv4/kernels/attention/fmha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 17b7df14..3abd839f 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -438,7 +438,7 @@ class FmhaKernel: cute.arch.fence_view_async_tmem_store() # TMA store via CUTLASS epilogue_tma_store - tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tOtO.layout) + tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tCtO_fake.layout) c_grp = pipeline.CooperativeGroup(pipeline.Agent.Thread, 32 * len(self.epilogue_warp_id)) c_pipe = pipeline.PipelineTmaStore.create(num_stages=self.num_c_stage, producer_group=c_grp) acc_cons_st = pipeline.make_pipeline_state(