feat: P1 — add eager warmup_fused_swiglu_compilation for SharedExpert (1-group)

2026-06-02 08:25:52 +00:00
parent 5ed4c86137
commit 553275d810
1 changed files with 9 additions and 0 deletions
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -1042,6 +1042,15 @@ def main():
        se.set_fused_swiglu(True)
        # EAGERLY process shared expert weights
        se._ensure_initialized()
+        # P1: Eagerly warmup fused SwiGLU compilation for SE (1-group)
+        if se._fused_swiglu:
+            from dsv4.ops.gemm_runner import warmup_fused_swiglu_compilation
+            K_packed = H // 2
+            N_packed_l1 = (2 * cfg.get("moe_intermediate_size", 3072)) // 2  # gate+up
+            warmup_fused_swiglu_compilation(
+                1, K_packed, N_packed_l1, dev,
+                swiglu_limit=cfg.get("swiglu_limit", 10.0),
+            )
        # Fix activation global scales — _ensure_initialized sets gsa from l1_gs (which is 1.0)
        # FIX: Same runtime gsa for SharedExpert
        se._use_runtime_gsa = True