feat: P1 — add eager warmup_fused_swiglu_compilation for SharedExpert (1-group)

This commit is contained in:
2026-06-02 08:25:52 +00:00
parent 5ed4c86137
commit 553275d810

View File

@@ -1042,6 +1042,15 @@ def main():
se.set_fused_swiglu(True)
# EAGERLY process shared expert weights
se._ensure_initialized()
# P1: Eagerly warmup fused SwiGLU compilation for SE (1-group)
if se._fused_swiglu:
from dsv4.ops.gemm_runner import warmup_fused_swiglu_compilation
K_packed = H // 2
N_packed_l1 = (2 * cfg.get("moe_intermediate_size", 3072)) // 2 # gate+up
warmup_fused_swiglu_compilation(
1, K_packed, N_packed_l1, dev,
swiglu_limit=cfg.get("swiglu_limit", 10.0),
)
# Fix activation global scales — _ensure_initialized sets gsa from l1_gs (which is 1.0)
# FIX: Same runtime gsa for SharedExpert
se._use_runtime_gsa = True