[Feature] use --eplb_config to set eplb param (#20562)
Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: rongfu.leng <lenronfu@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -121,11 +121,11 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
|
||||
|
||||
# Load balancing settings.
|
||||
vllm_config = get_current_vllm_config()
|
||||
parallel_config = vllm_config.parallel_config
|
||||
eplb_config = vllm_config.parallel_config.eplb_config
|
||||
self.enable_eplb = enable_eplb
|
||||
|
||||
self.n_logical_experts = self.n_routed_experts
|
||||
self.n_redundant_experts = parallel_config.num_redundant_experts
|
||||
self.n_redundant_experts = eplb_config.num_redundant_experts
|
||||
self.n_physical_experts = (self.n_logical_experts +
|
||||
self.n_redundant_experts)
|
||||
self.n_local_physical_experts = self.n_physical_experts // self.ep_size
|
||||
@@ -363,7 +363,8 @@ class Qwen3MoeModel(nn.Module):
|
||||
quant_config = vllm_config.quant_config
|
||||
parallel_config = vllm_config.parallel_config
|
||||
enable_eplb = parallel_config.enable_eplb
|
||||
self.num_redundant_experts = parallel_config.num_redundant_experts
|
||||
eplb_config = parallel_config.eplb_config
|
||||
self.num_redundant_experts = eplb_config.num_redundant_experts
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
Reference in New Issue
Block a user