debug: add TMEM read diagnostic

This commit is contained in:
2026-05-28 07:46:15 +00:00
parent a12607b0bd
commit b50f6a8512

View File

@@ -270,6 +270,9 @@ fmha_decode_tmem(
float r2 = u32_to_f32(u2) * inv_sum;
float r3 = u32_to_f32(u3) * inv_sum;
// Debug: print first few values from lane 0
if (lane == 0 && col == 0) printf("[tmem] read: r0=%f r1=%f r2=%f r3=%f inv_sum=%f\n", r0, r1, r2, r3, inv_sum);
// Step 4: Cast to BF16 and write to GMEM
int base = col * 128;
int d0 = base + lane * 4 + 0;