[Bugfix] Qwen3.5 kv-scale weight remapping (#34719)

Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
This commit is contained in:
Linda
2026-02-19 13:13:37 +01:00
committed by GitHub
parent 23210a911e
commit 6fff24f30f

View File

@@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
)
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
)
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.sequence import IntermediateTensors
@@ -397,6 +398,12 @@ class Qwen3_5Model(Qwen3NextModel):
if name.startswith("mtp."):
continue
# Remapping the name of FP8 kv-scale.
if name.endswith("scale"):
name = maybe_remap_kv_scale_name(name, params_dict)
if name is None:
continue
for param_name, weight_name, shard_id in stacked_params_mapping:
if "experts.gate_up_proj" in name or "experts.down_proj" in name:
is_fused_expert = True