From 04dd7545b3cb7ce043a3591a520ee7b81006f48e Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Sun, 31 May 2026 04:51:16 +0000
Subject: [PATCH] switch to production FMHA for full run

---
 single_shot_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/single_shot_inference.py b/single_shot_inference.py
index 6f244374..7e00051d 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -369,7 +369,7 @@ def forward_layer(X_l, w, li, cfg, rope_cos, rope_sin,
     # -- FMHA: (n_h, T, hd) × (1, seq_len, hd) → (n_h, T, hd) --
     q_input = q_heads.permute(1, 0, 2)  # (n_h, T, hd)
     # Use PyTorch SDPA for correctness verification
-    USE_SDPA = True  # Use SDPA with sinks for correctness
+    USE_SDPA = False  # Use production FMHA kernel (better residual, no sinks)
     if USE_SDPA:
         # Expand K/V for GQA: (1, seq_len, hd) → (n_h, seq_len, hd)
         k_expanded = k_full.expand(n_h, -1, -1).contiguous()  # (n_h, seq_len, hd)