From 34d64137ecd452ed0001d5f3d88ed06e65d03268 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 26 May 2026 20:29:34 +0000 Subject: [PATCH] D1.5 debug: force rescale_factor=0.5 to test if round-trip code executes --- dsv4/kernels/attention/fmha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 5cf57188..a0e81c94 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -558,7 +558,7 @@ class FmhaKernel: pv_done_bar.arrive_and_wait() # Wait for PV[kt-1] # Rescale O: load, multiply by acc_scale, store back to TMEM. # CUTLASS pattern: both copies use same tOtO_i (composition-tiled). - rescale_factor = acc_scale + rescale_factor = Float32(0.5) # DEBUG: force known value if const_expr(self.debug_noop_rescale): rescale_factor = Float32(1.0) n_slices = self.head_dim // corr_tile_size