[CI] Revert PRs 34818 and 33600 (#34979)
This commit is contained in:
@@ -457,9 +457,6 @@ def dummy_hf_overrides(
|
||||
# Kimi uses `num_expert_group` instead of `n_group`.
|
||||
if n_group is None:
|
||||
n_group = getattr(text_config, "num_expert_group", None)
|
||||
# InternS1Pro uses `router_n_groups` instead of `n_group`.
|
||||
if n_group is None:
|
||||
n_group = getattr(text_config, "router_n_groups", None)
|
||||
num_experts = n_group * 2 if n_group is not None else 2
|
||||
|
||||
# we use three layers for Gemma-3n to check
|
||||
@@ -489,14 +486,12 @@ def dummy_hf_overrides(
|
||||
# Only set MoE related config when the model has MoE layers.
|
||||
# Otherwise all models detected as MoE by _get_transformers_backend_cls.
|
||||
if model_arch_config.num_experts > 0:
|
||||
orig_topk = getattr(text_config, "num_experts_per_tok", 2)
|
||||
topk = min(orig_topk, 2)
|
||||
update_dict.update(
|
||||
{
|
||||
"num_experts": num_experts,
|
||||
"num_experts_per_tok": topk,
|
||||
"num_experts_per_tok": 2,
|
||||
# Kimi uses `num_experts_per_token`.
|
||||
"num_experts_per_token": topk,
|
||||
"num_experts_per_token": 2,
|
||||
"num_local_experts": num_experts,
|
||||
# Otherwise there will not be any expert layers
|
||||
"first_k_dense_replace": 0,
|
||||
|
||||
Reference in New Issue
Block a user