fix: epilogue warp self-signals acc_pipe producer before consuming
This commit is contained in:
@@ -410,6 +410,11 @@ class FmhaV3StageC2:
|
||||
tmem.wait_for_alloc()
|
||||
tmem_ptr = tmem.retrieve_ptr(self.qk_acc_dtype)
|
||||
epi_handle = corr_epi_cons.wait_and_advance()
|
||||
# Signal acc_pipe that O is ready (correction already normalized in TMEM)
|
||||
acc_prod_st = pipeline.make_pipeline_state(pipeline.PipelineUserType.Producer, 1)
|
||||
acc_pipe.producer_acquire(acc_prod_st)
|
||||
acc_pipe.producer_commit(acc_prod_st); acc_prod_st.advance()
|
||||
acc_pipe.producer_tail(acc_prod_st)
|
||||
# Write O from TMEM to GMEM via epilogue_tma_store
|
||||
tCtO_base = cute.make_tensor(tmem_ptr + self.tmem_o0_offset, tCtO_fake.layout)
|
||||
acc_cons_st = pipeline.make_pipeline_state(pipeline.PipelineUserType.Consumer, 1)
|
||||
|
||||
Reference in New Issue
Block a user