[Feature] use --eplb_config to set eplb param (#20562)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: rongfu.leng <lenronfu@gmail.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
rongfu.leng
2025-08-21 05:07:28 +08:00
committed by GitHub
parent 4e51fa8cba
commit 4fbda0b20c
9 changed files with 149 additions and 52 deletions

View File

@@ -1435,7 +1435,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
model,
is_dummy,
is_profile,
log_stats=self.parallel_config.eplb_log_balancedness,
log_stats=self.parallel_config.eplb_config.log_balancedness,
)
def get_dp_padding(self,
@@ -1977,7 +1977,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
global_expert_load, old_global_expert_indices = (
EplbState.recv_state())
num_logical_experts = global_expert_load.shape[1]
self.parallel_config.num_redundant_experts = (
self.parallel_config.eplb_config.num_redundant_experts = (
num_local_physical_experts * new_ep_size - num_logical_experts)
assert old_global_expert_indices.shape[
1] % num_local_physical_experts == 0