[Bugfix] Fix Qwen3NextForCausalLM packed_modules_mapping (#35413)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -412,6 +412,8 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase):
|
||||
prefix=f"{prefix}.in_proj_qkvz",
|
||||
)
|
||||
# ba_proj doesn't support blockwise fp8 quantization.
|
||||
# # in_proj_ba is defined as MergedColumnParallelLinear for
|
||||
# compatibility with Qwen3_5.
|
||||
self.in_proj_ba = MergedColumnParallelLinear(
|
||||
input_size=self.hidden_size,
|
||||
output_sizes=[self.num_v_heads] * 2,
|
||||
@@ -1326,6 +1328,8 @@ class Qwen3NextForCausalLM(
|
||||
"v_proj",
|
||||
],
|
||||
"gate_up_proj": ["gate_proj", "up_proj"],
|
||||
"in_proj_qkvz": ["in_proj_qkvz"],
|
||||
"in_proj_ba": ["in_proj_ba"],
|
||||
}
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
|
||||
Reference in New Issue
Block a user