From ea687980af7dd4e23c6835c8f5408239101fd5ea Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 22 May 2026 10:11:55 +0000 Subject: [PATCH] fix: epilogue warp self-signals acc_pipe producer before consuming --- tests/unit/test_fmha_v3_stage_c2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_fmha_v3_stage_c2.py b/tests/unit/test_fmha_v3_stage_c2.py index 36d66b44..6868be2a 100644 --- a/tests/unit/test_fmha_v3_stage_c2.py +++ b/tests/unit/test_fmha_v3_stage_c2.py @@ -410,6 +410,11 @@ class FmhaV3StageC2: tmem.wait_for_alloc() tmem_ptr = tmem.retrieve_ptr(self.qk_acc_dtype) epi_handle = corr_epi_cons.wait_and_advance() + # Signal acc_pipe that O is ready (correction already normalized in TMEM) + acc_prod_st = pipeline.make_pipeline_state(pipeline.PipelineUserType.Producer, 1) + acc_pipe.producer_acquire(acc_prod_st) + acc_pipe.producer_commit(acc_prod_st); acc_prod_st.advance() + acc_pipe.producer_tail(acc_prod_st) # Write O from TMEM to GMEM via epilogue_tma_store tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tCtO_fake.layout) acc_cons_st = pipeline.make_pipeline_state(pipeline.PipelineUserType.Consumer, 1)