From a90fe41b6b5e48acf76d58c8fef6804639030e31 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 23 May 2026 22:21:31 +0000 Subject: [PATCH] D1.3: Skip fragment creation in diagnostic, just print layouts --- tests/unit/test_d1_3_layout_diag.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/unit/test_d1_3_layout_diag.py b/tests/unit/test_d1_3_layout_diag.py index 0405d468..756d0ff6 100644 --- a/tests/unit/test_d1_3_layout_diag.py +++ b/tests/unit/test_d1_3_layout_diag.py @@ -91,22 +91,16 @@ def main(): print(f" sP_2d layout: {sP_2d.layout}") print(f" sP_2d layout: {sP_2d.layout}") - # PV A-operand fragments - tP_tmem = cute.make_tensor(0, p_smem_s.outer) # dummy ptr - tP_smem = cute.make_tensor(0, p_smem_s.outer) - tOrP_tmem = pv_thr.make_fragment_A(tP_tmem) - tCrP_smem = pv_mma.make_fragment_A(tP_smem) - tOrP_smem = pv_thr.make_fragment_A(tP_smem) - print(f" tOrP_tmem shape: {cute.shape(tOrP_tmem)}") - print(f" tCrP_smem shape: {cute.shape(tCrP_smem)} layout: {tCrP_smem.layout}") - print(f" tOrP_smem shape: {cute.shape(tOrP_smem)}") - - # Slice as kernel does - tOrP_tmem_s = tOrP_tmem[(None,None,None,0)] - tOrP_smem_s = tOrP_smem[(None,None,None,0)] - print(f" tOrP_tmem sliced: {cute.shape(tOrP_tmem_s)}") - print(f" tOrP_smem sliced: {cute.shape(tOrP_smem_s)}") - + # PV A-operand fragments — can't create with dummy ptr 0 (no memspace) + # Just print the layout info from the SMEM layout + print(f" p_smem_s.outer (sP layout): {p_smem_s.outer}") + print(f" p_smem_s.inner (swizzle): {p_smem_s.inner}") + + # What we need: the PV MMA's A-operand thread partition for sP + # This tells us which threads read which sP elements during PV GEMM + # The softmax warps must WRITE to sP using the same mapping + # (so that the MMA warp can READ using its own partition) + # Softmax TMEM load partition sfw_idx = 0 tmem_load_atom = cute.make_copy_atom(tcgen05.copy.Ld32x32bOp(tcgen05.copy.Repetition(32)), Float32)