diff --git a/dsv4/kernels/attention/fmha_epilogue_sm100.cuh b/dsv4/kernels/attention/fmha_epilogue_sm100.cuh index 988bcfba..fd44c954 100644 --- a/dsv4/kernels/attention/fmha_epilogue_sm100.cuh +++ b/dsv4/kernels/attention/fmha_epilogue_sm100.cuh @@ -207,7 +207,7 @@ fmha_decode_tmem( sRowSums[0] = row_sum; } __syncthreads(); - if (tid == 0) printf("[tmem] attention computed, row_sum=%f, max=%f\n", sRowSums[0], sPvBuf[0]); + if (tid == 0) printf("[tmem] attention computed, row_sum=%f, sPvBuf[0]=%f, sPvBuf[32]=%f\n", sRowSums[0], sPvBuf[0], HD>32?sPvBuf[32]:0.0f); // ================================================================ // One-way Correction Epilogue: SMEM → TMEM → regs → normalize → GMEM