diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py
index 36140616..93351c85 100644
--- a/dsv4/kernels/attention/fmha.py
+++ b/dsv4/kernels/attention/fmha.py
@@ -181,7 +181,10 @@ class FmhaKernel:
         # For SMEM-P, offset is 0 (P not in TMEM).
         # Must be defined unconditionally (CuTeDSL scoping).
         _p0_bf16_offset = max(self.tmem_p0_offset, 0) * (32 // 16)  # Python int
-        tOrP0 = cute.make_tensor(tOrP.iterator + _p0_bf16_offset, tOrP.layout)
+        if _p0_bf16_offset > 0:
+            tOrP0 = cute.make_tensor(tOrP.iterator + _p0_bf16_offset, tOrP.layout)
+        else:
+            tOrP0 = tOrP
 
         tCtO_fake = pv_mma.make_fragment_C(cute.append(pv_as, self.num_acc_stage))
         pipeline.pipeline_init_wait(cluster_shape_mn=cl_vmnk)