D1.3: Fix while loop in cotiled diag - precompute num_tmem_alloc_cols
This commit is contained in:
@@ -82,9 +82,10 @@ class SmemPDiag:
|
||||
s_cols = self.qk_mma_tiler[1]
|
||||
o_cols = find_tmem_tensor_col_offset(tOtO)
|
||||
total = max(s_cols, o_cols)
|
||||
self.num_tmem_alloc_cols = 1
|
||||
while self.num_tmem_alloc_cols < total:
|
||||
self.num_tmem_alloc_cols *= 2
|
||||
_n = 1
|
||||
while _n < total:
|
||||
_n *= 2
|
||||
self.num_tmem_alloc_cols = _n
|
||||
self.tOrP0_offset = 0 # SMEM-P
|
||||
|
||||
# Build SMEM layouts
|
||||
|
||||
Reference in New Issue
Block a user