diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 61869bf6..87783e49 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -408,6 +408,12 @@ class FmhaKernel: # SMEM-P: Already wrote P values to SMEM in softmax loop # Just need fence and barrier print(f"[SMEM-P CUTLASS] P values already written to SMEM, proceeding to fence") + + # DEBUG: Compute offset for known coordinate to verify mapping + test_coord = ((0,0), 0, (0,0), 0) + test_offset = cute.crd2idx(test_coord, sP.layout) + print(f"[SMEM-P DEBUG] test_coord {test_coord} -> offset {test_offset}") + cute.arch.fence_proxy("async.shared", space="cta") # Barrier for both TMEM-P and SMEM-P paths