[Feature] use --eplb_config to set eplb param (#20562)
Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: rongfu.leng <lenronfu@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -1435,7 +1435,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
model,
|
||||
is_dummy,
|
||||
is_profile,
|
||||
log_stats=self.parallel_config.eplb_log_balancedness,
|
||||
log_stats=self.parallel_config.eplb_config.log_balancedness,
|
||||
)
|
||||
|
||||
def get_dp_padding(self,
|
||||
@@ -1977,7 +1977,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
global_expert_load, old_global_expert_indices = (
|
||||
EplbState.recv_state())
|
||||
num_logical_experts = global_expert_load.shape[1]
|
||||
self.parallel_config.num_redundant_experts = (
|
||||
self.parallel_config.eplb_config.num_redundant_experts = (
|
||||
num_local_physical_experts * new_ep_size - num_logical_experts)
|
||||
assert old_global_expert_indices.shape[
|
||||
1] % num_local_physical_experts == 0
|
||||
|
||||
Reference in New Issue
Block a user