diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 3822aae00..9fded8e6b 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -145,7 +145,13 @@ class Qwen3NextSparseMoeBlock(nn.Module): prefix=f"{prefix}.gate", ) - self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False) + self.shared_expert_gate = ReplicatedLinear( + config.hidden_size, + 1, + bias=False, + quant_config=None, + prefix=f"{prefix}.shared_expert_gate", + ) if config.shared_expert_intermediate_size > 0: self.shared_expert = Qwen3NextMLP(