SMEM-P: current state - working but mapping wrong (cos 0.02)

This commit is contained in:
2026-05-23 19:53:25 +00:00
parent 3d044b4747
commit 01fe51b175

View File

@@ -366,7 +366,13 @@ class FmhaKernel:
pattern_val = Float32(linear_idx)
p_val_bf16 = pattern_val.to(self.q_dtype)
# Original: p_val_bf16 = tTMEM_LOADrS_frg[k, j].to(self.q_dtype)
sP[pv_coord] = p_val_bf16
# Try both tensor indexing AND manual offset for debugging
sP[pv_coord] = p_val_bf16 # Tensor indexing
# Also compute manual offset to verify
# offset = cute.crd2idx(pv_coord, sP.layout)
# (sP.iterator + offset) = p_val_bf16
row_sum = row_sum + tTMEM_LOADrS_frg[k, j]
s_vec = tTMEM_LOADrS_frg[None, j].load()