From 25aeaca9ab52bbd20efe39f55e4fcc14f072cc88 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 30 May 2026 06:56:53 +0000 Subject: [PATCH] fix: PV accumulate flag --- dsv4/kernels/attention/fmha_6warp_tma_multirow_multitile.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dsv4/kernels/attention/fmha_6warp_tma_multirow_multitile.cuh b/dsv4/kernels/attention/fmha_6warp_tma_multirow_multitile.cuh index e75025ff..f5fc8b18 100644 --- a/dsv4/kernels/attention/fmha_6warp_tma_multirow_multitile.cuh +++ b/dsv4/kernels/attention/fmha_6warp_tma_multirow_multitile.cuh @@ -278,8 +278,7 @@ fmha_6warp_tma_multirow_multitile_kernel(FmhaTmaMultiRowMultiTileParams params) uint64_t dv = make_umma_desc_kmajor_none(__cvta_generic_to_shared(sV), 16); // TMEM column offset: (n_sub - n_sub_start) * 16 int tmem_col = (n_sub - n_sub_start) * 16; - bool accumulate = (pv_kt > 0) || (n_sub > n_sub_start); - if (tid == 128) umma_ss_f16(tb + tmem_col, dp, dv, idesc_pv, accumulate); + if (tid == 128) umma_ss_f16(tb + tmem_col, dp, dv, idesc_pv, pv_kt > 0); asm volatile("tcgen05.fence::after_thread_sync;" ::: "memory"); } __syncthreads();