diff --git a/single_shot_inference.py b/single_shot_inference.py index d0c936e5..82afad7f 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -1042,6 +1042,15 @@ def main(): se.set_fused_swiglu(True) # EAGERLY process shared expert weights se._ensure_initialized() + # P1: Eagerly warmup fused SwiGLU compilation for SE (1-group) + if se._fused_swiglu: + from dsv4.ops.gemm_runner import warmup_fused_swiglu_compilation + K_packed = H // 2 + N_packed_l1 = (2 * cfg.get("moe_intermediate_size", 3072)) // 2 # gate+up + warmup_fused_swiglu_compilation( + 1, K_packed, N_packed_l1, dev, + swiglu_limit=cfg.get("swiglu_limit", 10.0), + ) # Fix activation global scales — _ensure_initialized sets gsa from l1_gs (which is 1.0) # FIX: Same runtime gsa for SharedExpert se._use_runtime_gsa = True