From 215282971c54e7ebf760752872ff1b5bd40544a6 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 22 May 2026 20:34:21 +0000 Subject: [PATCH] DEBUG: hardcoded Int32(1) to test if TMA can read tile 1 --- tests/unit/test_fmha_v3_stage_c.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_fmha_v3_stage_c.py b/tests/unit/test_fmha_v3_stage_c.py index 5a1c63e3..4c79cab9 100644 --- a/tests/unit/test_fmha_v3_stage_c.py +++ b/tests/unit/test_fmha_v3_stage_c.py @@ -224,18 +224,11 @@ class FmhaV3StageCMulti: cute.copy(tma_q, tAgQ[(None, Int32(0))], tAsQ[(None, qh.index)], tma_bar_ptr=qh.barrier) qp.tail() kvp.reset() - # Try using the pipeline state count (kh.count) as the coordinate. - # This is what the CUTLASS reference's "mode 1" does — the pipeline - # index IS the GMEM tile index for 2-stage pipelines with 2 KV tiles. - # For more tiles, we need a separate counter. - # But first, let's test if the coordinate matters at all by using - # Int32(1) for the second tile when n=256. + # DEBUG: Use constant Int32(1) to test if TMA can read from tile 1 at all for kt in range(n_kv_tiles): kvh = kvp.acquire_and_advance() - # Use a hardcoded coord to test if TMA even reads different tiles - coord = Int32(kt) # Should be 0, 1, 2, ... but might be constant-folded - cute.copy(tma_k, tBgK[(None, coord)], tBsK[(None, kvh.index)], tma_bar_ptr=kvh.barrier) - cute.copy(tma_v, tVgV[(None, coord)], tVsV[(None, kvh.index)], tma_bar_ptr=kvh.barrier) + cute.copy(tma_k, tBgK[(None, Int32(1))], tBsK[(None, kvh.index)], tma_bar_ptr=kvh.barrier) + cute.copy(tma_v, tVgV[(None, Int32(1))], tVsV[(None, kvh.index)], tma_bar_ptr=kvh.barrier) kvp.tail() # ===== MMA warp =====