[EPLB] Optimize EPLB with numpy (#29499)

Signed-off-by: ilmarkov <markovilya197@gmail.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
2026-01-07 21:21:35 +01:00
parent 0ada960a20
commit 6170d47d22
8 changed files with 732 additions and 266 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -69,6 +69,10 @@ class EPLBConfig:
    Log the balancedness each step of expert parallelism.
    This is turned off by default since it will cause communication overhead.
    """
+    log_balancedness_interval: int = 1
+    """
+    Interval for logging the balancedness.
+    """
    use_async: bool = False
    """
    Whether to use non-blocking EPLB.
@@ -77,6 +81,14 @@ class EPLBConfig:
    policy: EPLBPolicyOption = "default"
    """The policy type for expert parallel load balancing (EPLB)."""

+    @model_validator(mode="after")
+    def _validate_eplb_config(self) -> Self:
+        if self.use_async and self.policy != "default":
+            raise ValueError("Async EPLB is only supported with the default policy.")
+        if self.log_balancedness and self.log_balancedness_interval <= 0:
+            raise ValueError("log_balancedness_interval must be greater than 0.")
+        return self
+

@config
@dataclass