[Feature] use --eplb_config to set eplb param (#20562)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: rongfu.leng <lenronfu@gmail.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
rongfu.leng
2025-08-21 05:07:28 +08:00
committed by GitHub
parent 4e51fa8cba
commit 4fbda0b20c
9 changed files with 149 additions and 52 deletions

View File

@@ -1435,7 +1435,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
model,
is_dummy,
is_profile,
log_stats=self.parallel_config.eplb_log_balancedness,
log_stats=self.parallel_config.eplb_config.log_balancedness,
)
def get_dp_padding(self,
@@ -1977,7 +1977,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
global_expert_load, old_global_expert_indices = (
EplbState.recv_state())
num_logical_experts = global_expert_load.shape[1]
self.parallel_config.num_redundant_experts = (
self.parallel_config.eplb_config.num_redundant_experts = (
num_local_physical_experts * new_ep_size - num_logical_experts)
assert old_global_expert_indices.shape[
1] % num_local_physical_experts == 0

View File

@@ -515,7 +515,7 @@ class Worker(WorkerBase):
assert self.model_runner.eplb_state is not None
new_physical_experts = \
self.model_runner.eplb_state.physical_to_logical_map.shape[1]
parallel_config.num_redundant_experts = (
parallel_config.eplb_config.num_redundant_experts = (
new_physical_experts -
self.model_runner.eplb_state.logical_replica_count.shape[1])
global_expert_load = None
@@ -531,7 +531,7 @@ class Worker(WorkerBase):
assert self.model_runner.eplb_state is not None
global_expert_load = self.model_runner.eplb_state.rearrange(
self.model_runner.model, execute_shuffle=False)
parallel_config.num_redundant_experts = (
parallel_config.eplb_config.num_redundant_experts = (
new_physical_experts - global_expert_load.shape[1])
prepare_communication_buffer_for_model(self.model_runner.model)
self.model_runner.model.update_physical_experts_metadata(