From d0a50f1f2ebf1dd1cae179d2512fa47ccfa8b6b3 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 29 May 2026 19:36:41 +0000 Subject: [PATCH] fix: remove double normalization in TMA epilogue (P already normalized before PV) --- dsv4/kernels/attention/fmha_6warp_tma.cuh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dsv4/kernels/attention/fmha_6warp_tma.cuh b/dsv4/kernels/attention/fmha_6warp_tma.cuh index 82874175..df3f5d69 100644 --- a/dsv4/kernels/attention/fmha_6warp_tma.cuh +++ b/dsv4/kernels/attention/fmha_6warp_tma.cuh @@ -255,10 +255,8 @@ fmha_6warp_tma_kernel( asm volatile("tcgen05.wait::ld.sync.aligned;"); if (lane == 0) for (int c=0;c<8;c++) o_vals[n*8+c] = tmp[c]; } - float row_sum = *sRowSum; - float inv_rs = 1.0f / row_sum; - if (lane == 0) for (int d=0;d