From 553275d81025e1832349e336007e0419c485ae2c Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Tue, 2 Jun 2026 08:25:52 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20P1=20=E2=80=94=20add=20eager=20warmup?=
 =?UTF-8?q?=5Ffused=5Fswiglu=5Fcompilation=20for=20SharedExpert=20(1-group?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 single_shot_inference.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/single_shot_inference.py b/single_shot_inference.py
index d0c936e5..82afad7f 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -1042,6 +1042,15 @@ def main():
         se.set_fused_swiglu(True)
         # EAGERLY process shared expert weights
         se._ensure_initialized()
+        # P1: Eagerly warmup fused SwiGLU compilation for SE (1-group)
+        if se._fused_swiglu:
+            from dsv4.ops.gemm_runner import warmup_fused_swiglu_compilation
+            K_packed = H // 2
+            N_packed_l1 = (2 * cfg.get("moe_intermediate_size", 3072)) // 2  # gate+up
+            warmup_fused_swiglu_compilation(
+                1, K_packed, N_packed_l1, dev,
+                swiglu_limit=cfg.get("swiglu_limit", 10.0),
+            )
         # Fix activation global scales — _ensure_initialized sets gsa from l1_gs (which is 1.0)
         # FIX: Same runtime gsa for SharedExpert
         se._use_runtime_gsa = True