From 18f3274c0bc24c425cdb1121ffaa0ee3d63e74bc Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 24 May 2026 22:19:16 +0000 Subject: [PATCH] D1: DEBUG - NO-OP O rescale (multiply by 1.0) to test TMEM round-trip --- dsv4/kernels/attention/fmha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index f42693d4..1580525f 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -465,7 +465,7 @@ class FmhaKernel: ) cute.copy(tiled_tmem_load_o, tTMEM_LOADtO_i, tTMrO_i) for k in cutlass.range(cute.size(tTMrO_i), vectorize=True): - tTMrO_i[k] = tTMrO_i[k] * acc_scale + tTMrO_i[k] = tTMrO_i[k] * Float32(1.0) # DEBUG: NO-OP round-trip test cute.copy(tiled_tmem_store_o, tTMrO_i, tTMEM_STOREtO_i) cute.arch.fence_view_async_tmem_store()