[Misc] Fix Qwen2-MoE shared_expert_gate (#31339)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-12-26 13:10:39 +08:00
parent c79dbfa9ad
commit c3666f56fd
2 changed files with 8 additions and 3 deletions
--- a/vllm/model_executor/models/qwen2_moe.py
+++ b/vllm/model_executor/models/qwen2_moe.py
@@ -111,7 +111,7 @@ class Qwen2MoeMLP(nn.Module):
        out, _ = self.down_proj(out)

        if self.expert_gate is not None:
-            out = F.sigmoid(self.expert_gate(x)) * out
+            out = F.sigmoid(self.expert_gate(x)[0]) * out

        return out

@@ -140,7 +140,13 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
            prefix=f"{prefix}.gate",
        )

-        self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False)
+        self.shared_expert_gate = ReplicatedLinear(
+            config.hidden_size,
+            1,
+            bias=False,
+            quant_config=None,
+            prefix=f"{prefix}.shared_expert_gate",
+        )

        if config.shared_expert_intermediate_size > 0:
            self.shared_expert = Qwen2MoeMLP(