SMEM-P: current state - working but mapping wrong (cos 0.02)
This commit is contained in:
@@ -366,7 +366,13 @@ class FmhaKernel:
|
||||
pattern_val = Float32(linear_idx)
|
||||
p_val_bf16 = pattern_val.to(self.q_dtype)
|
||||
# Original: p_val_bf16 = tTMEM_LOADrS_frg[k, j].to(self.q_dtype)
|
||||
sP[pv_coord] = p_val_bf16
|
||||
|
||||
# Try both tensor indexing AND manual offset for debugging
|
||||
sP[pv_coord] = p_val_bf16 # Tensor indexing
|
||||
|
||||
# Also compute manual offset to verify
|
||||
# offset = cute.crd2idx(pv_coord, sP.layout)
|
||||
# (sP.iterator + offset) = p_val_bf16
|
||||
|
||||
row_sum = row_sum + tTMEM_LOADrS_frg[k, j]
|
||||
s_vec = tTMEM_LOADrS_frg[None, j].load()
|
||||
|
||||
Reference in New Issue
Block a user