[Bugfix] Qwen3.5 kv-scale weight remapping (#34719)
Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
This commit is contained in:
@@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
)
|
||||
from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader,
|
||||
maybe_remap_kv_scale_name,
|
||||
)
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.sequence import IntermediateTensors
|
||||
@@ -397,6 +398,12 @@ class Qwen3_5Model(Qwen3NextModel):
|
||||
if name.startswith("mtp."):
|
||||
continue
|
||||
|
||||
# Remapping the name of FP8 kv-scale.
|
||||
if name.endswith("scale"):
|
||||
name = maybe_remap_kv_scale_name(name, params_dict)
|
||||
if name is None:
|
||||
continue
|
||||
|
||||
for param_name, weight_name, shard_id in stacked_params_mapping:
|
||||
if "experts.gate_up_proj" in name or "experts.down_proj" in name:
|
||||
is_fused_expert = True
|
||||
|
||||
Reference in New Issue
Block a user