From a5061a24b9db0a2eef00561fe0e68fc29ba0fcee Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 23 May 2026 21:13:52 +0000 Subject: [PATCH] D5a: Use tensor indexing for LSE write --- dsv4/kernels/attention/fmha.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 1cdab662..9ed1e596 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -453,7 +453,8 @@ class FmhaKernel: _row_max_safe = Float32(0.0) if sfw_idx == 0: lse_val = cute.math.log(row_sum, fastmath=True) + _row_max_safe - cute.store(lse_val, mLSE[None, None, 0]) + # Write LSE to GMEM: mLSE is a (1,1,1) FP32 tensor + mLSE[0, 0, 0] = lse_val tmem.relinquish_alloc_permit() tmem.free(tmem_ptr)