[Feature] use --eplb_config to set eplb param (#20562)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: rongfu.leng <lenronfu@gmail.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
rongfu.leng
2025-08-21 05:07:28 +08:00
committed by GitHub
parent 4e51fa8cba
commit 4fbda0b20c
9 changed files with 149 additions and 52 deletions

View File

@@ -121,11 +121,11 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
# Load balancing settings.
vllm_config = get_current_vllm_config()
parallel_config = vllm_config.parallel_config
eplb_config = vllm_config.parallel_config.eplb_config
self.enable_eplb = enable_eplb
self.n_logical_experts = self.n_routed_experts
self.n_redundant_experts = parallel_config.num_redundant_experts
self.n_redundant_experts = eplb_config.num_redundant_experts
self.n_physical_experts = (self.n_logical_experts +
self.n_redundant_experts)
self.n_local_physical_experts = self.n_physical_experts // self.ep_size
@@ -363,7 +363,8 @@ class Qwen3MoeModel(nn.Module):
quant_config = vllm_config.quant_config
parallel_config = vllm_config.parallel_config
enable_eplb = parallel_config.enable_eplb
self.num_redundant_experts = parallel_config.num_redundant_experts
eplb_config = parallel_config.eplb_config
self.num_redundant_experts = eplb_config.num_redundant_experts
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size