feat: P1 — add eager warmup_fused_swiglu_compilation for SharedExpert (1-group)
This commit is contained in:
@@ -1042,6 +1042,15 @@ def main():
|
||||
se.set_fused_swiglu(True)
|
||||
# EAGERLY process shared expert weights
|
||||
se._ensure_initialized()
|
||||
# P1: Eagerly warmup fused SwiGLU compilation for SE (1-group)
|
||||
if se._fused_swiglu:
|
||||
from dsv4.ops.gemm_runner import warmup_fused_swiglu_compilation
|
||||
K_packed = H // 2
|
||||
N_packed_l1 = (2 * cfg.get("moe_intermediate_size", 3072)) // 2 # gate+up
|
||||
warmup_fused_swiglu_compilation(
|
||||
1, K_packed, N_packed_l1, dev,
|
||||
swiglu_limit=cfg.get("swiglu_limit", 10.0),
|
||||
)
|
||||
# Fix activation global scales — _ensure_initialized sets gsa from l1_gs (which is 1.0)
|
||||
# FIX: Same runtime gsa for SharedExpert
|
||||
se._use_runtime_gsa = True
|
||||
|
||||
Reference in New Issue
Block a user