Add more debug prints for sP shapes

This commit is contained in:
2026-05-23 09:26:30 +00:00
parent 2283de1cfc
commit c9dda47971

View File

@@ -268,7 +268,9 @@ class FmhaKernel:
)
tiled_smem_copy = cute.make_tiled_copy_C(smem_copy_atom, qk_mma)
thr_smem_copy = tiled_smem_copy.get_slice(sfw_idx)
sP_2d = cute.group_modes(sP, 0, 3) # flatten to 2D for copy
print(f"[SMEM-P DEBUG] sP shape: {cute.shape(sP)}")
sP_2d = cute.group_modes(sP, 0, 3)
print(f"[SMEM-P DEBUG] sP_2d shape: {cute.shape(sP_2d)}") # flatten to 2D for copy
tSMEM_CPYsP = thr_smem_copy.partition_D(sP_2d) # destination (SMEM)
row_max = -Float32.inf