From 1e5ad9b74f70f4690dff629598d414ee27116b85 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 27 Feb 2026 11:46:30 +0800 Subject: [PATCH] [Bugfix] Fix Qwen3NextForCausalLM packed_modules_mapping (#35413) Signed-off-by: Jee Jee Li --- vllm/model_executor/models/qwen3_next.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 777d1d7bf..c57265cc7 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -412,6 +412,8 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase): prefix=f"{prefix}.in_proj_qkvz", ) # ba_proj doesn't support blockwise fp8 quantization. + # # in_proj_ba is defined as MergedColumnParallelLinear for + # compatibility with Qwen3_5. self.in_proj_ba = MergedColumnParallelLinear( input_size=self.hidden_size, output_sizes=[self.num_v_heads] * 2, @@ -1326,6 +1328,8 @@ class Qwen3NextForCausalLM( "v_proj", ], "gate_up_proj": ["gate_proj", "up_proj"], + "in_proj_qkvz": ["in_proj_qkvz"], + "in_proj_ba": ["in_proj_ba"], } def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):