[Feature] Expert Parallelism Load Balancer (EPLB) (#18343)
Signed-off-by: Bowen Wang <abmfy@icloud.com>
This commit is contained in:
@@ -31,12 +31,20 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
|
||||
|
||||
text_config = hf_config.get_text_config()
|
||||
|
||||
# Ensure at least 2 expert per group
|
||||
# Since `grouped_topk` assums top-2
|
||||
num_experts = getattr(text_config, 'n_group', 1) * 2
|
||||
|
||||
text_config.update({
|
||||
"num_layers": 1,
|
||||
"num_hidden_layers": 1,
|
||||
"num_experts": 2,
|
||||
"num_experts": num_experts,
|
||||
"num_experts_per_tok": 2,
|
||||
"num_local_experts": 2,
|
||||
"num_local_experts": num_experts,
|
||||
# Otherwise there will not be any expert layers
|
||||
"first_k_dense_replace": 0,
|
||||
# To avoid OOM on DeepSeek-V3
|
||||
"n_routed_experts": num_experts,
|
||||
})
|
||||
|
||||
if hasattr(hf_config, "vision_config"):
|
||||
|
||||
Reference in New Issue
Block a user