Test: Python range() instead of cutlass.range() for TMA loop
This commit is contained in:
@@ -225,7 +225,7 @@ class FmhaV3StageCMulti:
|
||||
qp.tail()
|
||||
kvp.reset()
|
||||
kv_coord = n_kv_tiles - n_kv_tiles # SSA runtime zero
|
||||
for kt in cutlass.range(0, n_kv_tiles, 1, unroll=1):
|
||||
for kt in range(n_kv_tiles):
|
||||
kvh = kvp.acquire_and_advance()
|
||||
cute.copy(tma_k, tBgK[(None, kv_coord)], tBsK[(None, kvh.index)], tma_bar_ptr=kvh.barrier)
|
||||
cute.copy(tma_v, tVgV[(None, kv_coord)], tVsV[(None, kvh.index)], tma_bar_ptr=kvh.barrier)
|
||||
|
||||
Reference in New Issue
Block a user