diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 19ee9666..043bfbf1 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -366,7 +366,13 @@ class FmhaKernel: pattern_val = Float32(linear_idx) p_val_bf16 = pattern_val.to(self.q_dtype) # Original: p_val_bf16 = tTMEM_LOADrS_frg[k, j].to(self.q_dtype) - sP[pv_coord] = p_val_bf16 + + # Try both tensor indexing AND manual offset for debugging + sP[pv_coord] = p_val_bf16 # Tensor indexing + + # Also compute manual offset to verify + # offset = cute.crd2idx(pv_coord, sP.layout) + # (sP.iterator + offset) = p_val_bf16 row_sum = row_sum + tTMEM_LOADrS_frg[k, j] s_vec = tTMEM_LOADrS_frg[None, j].load()