[Feature] Expert Parallelism Load Balancer (EPLB) (#18343)

Signed-off-by: Bowen Wang <abmfy@icloud.com>
This commit is contained in:
Bowen Wang
2025-06-26 15:30:21 -07:00
committed by GitHub
parent 07b8fae219
commit e9fd658a73
24 changed files with 2446 additions and 54 deletions

View File

@@ -31,12 +31,20 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
text_config = hf_config.get_text_config()
# Ensure at least 2 expert per group
# Since `grouped_topk` assums top-2
num_experts = getattr(text_config, 'n_group', 1) * 2
text_config.update({
"num_layers": 1,
"num_hidden_layers": 1,
"num_experts": 2,
"num_experts": num_experts,
"num_experts_per_tok": 2,
"num_local_experts": 2,
"num_local_experts": num_experts,
# Otherwise there will not be any expert layers
"first_k_dense_replace": 0,
# To avoid OOM on DeepSeek-V3
"n_routed_experts": num_experts,
})
if hasattr(hf_config, "vision_config"):