diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 777d1d7bf..c57265cc7 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -412,6 +412,8 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase): prefix=f"{prefix}.in_proj_qkvz", ) # ba_proj doesn't support blockwise fp8 quantization. + # # in_proj_ba is defined as MergedColumnParallelLinear for + # compatibility with Qwen3_5. self.in_proj_ba = MergedColumnParallelLinear( input_size=self.hidden_size, output_sizes=[self.num_v_heads] * 2, @@ -1326,6 +1328,8 @@ class Qwen3NextForCausalLM( "v_proj", ], "gate_up_proj": ["gate_proj", "up_proj"], + "in_proj_qkvz": ["in_proj_qkvz"], + "in_proj_ba": ["in_proj_ba"], } def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):