D1.3: Fix while loop in cotiled diag - precompute num_tmem_alloc_cols

This commit is contained in:
2026-05-24 00:07:22 +00:00
parent d9f3fcd71d
commit 571fc43f57

View File

@@ -82,9 +82,10 @@ class SmemPDiag:
s_cols = self.qk_mma_tiler[1]
o_cols = find_tmem_tensor_col_offset(tOtO)
total = max(s_cols, o_cols)
self.num_tmem_alloc_cols = 1
while self.num_tmem_alloc_cols < total:
self.num_tmem_alloc_cols *= 2
_n = 1
while _n < total:
_n *= 2
self.num_tmem_alloc_cols = _n
self.tOrP0_offset = 0 # SMEM-P
# Build SMEM layouts