From 01621e152014a867cc008afbca0dbef0eea90ecc Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 22 May 2026 17:57:58 +0000 Subject: [PATCH] Diag: try runtime Int32(0+0) for kv_coord with cutlass.range --- tests/unit/test_fmha_v3_diag.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_fmha_v3_diag.py b/tests/unit/test_fmha_v3_diag.py index 532f5e44..e2394469 100644 --- a/tests/unit/test_fmha_v3_diag.py +++ b/tests/unit/test_fmha_v3_diag.py @@ -167,11 +167,13 @@ class FmhaV3Diag: cute.copy(tma_q, tAgQ[(None, Int32(0))], tAsQ[(None, qh.index)], tma_bar_ptr=qh.barrier) qp.tail() kvp.reset(); pk = kvp.try_acquire() - for kt in range(self.n_kv_tiles): - coord = Int32(kt) + # Force runtime Int32 (not literal) — option 3 from CUTLASS LLM + kv_coord = Int32(0 + 0) + for kt in cutlass.range(self.n_kv_tiles, unroll=1): kvh = kvp.acquire_and_advance(pk) - cute.copy(tma_k, tBgK[(None, coord)], tBsK[(None, kvh.index)], tma_bar_ptr=kvh.barrier) - cute.copy(tma_v, tVgV[(None, coord)], tVsV[(None, kvh.index)], tma_bar_ptr=kvh.barrier) + cute.copy(tma_k, tBgK[(None, kv_coord)], tBsK[(None, kvh.index)], tma_bar_ptr=kvh.barrier) + cute.copy(tma_v, tVgV[(None, kv_coord)], tVsV[(None, kvh.index)], tma_bar_ptr=kvh.barrier) + kv_coord += 1 pk = cutlass.Boolean(1) kvp.tail()