[Feature] Expert Parallelism Load Balancer (EPLB) (#18343)

Signed-off-by: Bowen Wang <abmfy@icloud.com>
This commit is contained in:
Bowen Wang
2025-06-26 15:30:21 -07:00
committed by GitHub
parent 07b8fae219
commit e9fd658a73
24 changed files with 2446 additions and 54 deletions

View File

@@ -320,6 +320,11 @@ class EngineArgs:
data_parallel_rpc_port: Optional[int] = None
data_parallel_backend: str = ParallelConfig.data_parallel_backend
enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
enable_eplb: bool = ParallelConfig.enable_eplb
num_redundant_experts: int = ParallelConfig.num_redundant_experts
eplb_window_size: int = ParallelConfig.eplb_window_size
eplb_step_interval: int = ParallelConfig.eplb_step_interval
eplb_log_balancedness: bool = ParallelConfig.eplb_log_balancedness
max_parallel_loading_workers: Optional[
int] = ParallelConfig.max_parallel_loading_workers
block_size: Optional[BlockSize] = CacheConfig.block_size
@@ -666,6 +671,16 @@ class EngineArgs:
parallel_group.add_argument(
"--enable-expert-parallel",
**parallel_kwargs["enable_expert_parallel"])
parallel_group.add_argument("--enable-eplb",
**parallel_kwargs["enable_eplb"])
parallel_group.add_argument("--num-redundant-experts",
**parallel_kwargs["num_redundant_experts"])
parallel_group.add_argument("--eplb-window-size",
**parallel_kwargs["eplb_window_size"])
parallel_group.add_argument("--eplb-step-interval",
**parallel_kwargs["eplb_step_interval"])
parallel_group.add_argument("--eplb-log-balancedness",
**parallel_kwargs["eplb_log_balancedness"])
parallel_group.add_argument(
"--max-parallel-loading-workers",
**parallel_kwargs["max_parallel_loading_workers"])
@@ -1135,6 +1150,11 @@ class EngineArgs:
data_parallel_rpc_port=data_parallel_rpc_port,
data_parallel_backend=data_parallel_backend,
enable_expert_parallel=self.enable_expert_parallel,
enable_eplb=self.enable_eplb,
num_redundant_experts=self.num_redundant_experts,
eplb_window_size=self.eplb_window_size,
eplb_step_interval=self.eplb_step_interval,
eplb_log_balancedness=self.eplb_log_balancedness,
max_parallel_loading_workers=self.max_parallel_loading_workers,
disable_custom_all_reduce=self.disable_custom_all_reduce,
ray_workers_use_nsight=self.ray_workers_use_nsight,