[Misc] Fix Qwen2-MoE shared_expert_gate (#31339)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-12-26 13:10:39 +08:00
committed by GitHub
parent c79dbfa9ad
commit c3666f56fd
2 changed files with 8 additions and 3 deletions

View File

@@ -111,7 +111,7 @@ class Qwen2MoeMLP(nn.Module):
out, _ = self.down_proj(out)
if self.expert_gate is not None:
out = F.sigmoid(self.expert_gate(x)) * out
out = F.sigmoid(self.expert_gate(x)[0]) * out
return out
@@ -140,7 +140,13 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
prefix=f"{prefix}.gate",
)
self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False)
self.shared_expert_gate = ReplicatedLinear(
config.hidden_size,
1,
bias=False,
quant_config=None,
prefix=f"{prefix}.shared_expert_gate",
)
if config.shared_expert_intermediate_size > 0:
self.shared_expert = Qwen2MoeMLP(