SMEM-P: add iterator offset debug print

This commit is contained in:
2026-05-23 20:19:22 +00:00
parent 42880fa397
commit c0e2fe43f1

View File

@@ -186,6 +186,12 @@ class FmhaKernel:
tCrP = pv_mma.make_fragment_A(sP)
if self.use_smem_p:
print(f"[SMEM-P DEBUG] tCrP shape: {cute.shape(tCrP)} layout: {tCrP.layout}")
# DEBUG: compute iterator offset between tCrP and sP
try:
offset_elems = tCrP.iterator - sP.iterator
print(f"[SMEM-P DEBUG] tCrP iterator offset: {offset_elems}")
except:
print(f"[SMEM-P DEBUG] iterator offset not available")
# tOrP0 always defined as tOrP. The TMEM-P path in the MMA warp applies
# the p0 column offset inline when constructing the gemm arguments.
tOrP0 = tOrP