[EPLB] Optimize EPLB with numpy (#29499)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Ilya Markov
2026-01-07 21:21:35 +01:00
committed by GitHub
parent 0ada960a20
commit 6170d47d22
8 changed files with 732 additions and 266 deletions

View File

@@ -69,6 +69,10 @@ class EPLBConfig:
Log the balancedness each step of expert parallelism.
This is turned off by default since it will cause communication overhead.
"""
log_balancedness_interval: int = 1
"""
Interval for logging the balancedness.
"""
use_async: bool = False
"""
Whether to use non-blocking EPLB.
@@ -77,6 +81,14 @@ class EPLBConfig:
policy: EPLBPolicyOption = "default"
"""The policy type for expert parallel load balancing (EPLB)."""
@model_validator(mode="after")
def _validate_eplb_config(self) -> Self:
if self.use_async and self.policy != "default":
raise ValueError("Async EPLB is only supported with the default policy.")
if self.log_balancedness and self.log_balancedness_interval <= 0:
raise ValueError("log_balancedness_interval must be greater than 0.")
return self
@config
@dataclass