diff --git a/vllm/model_executor/models/qwen3_5.py b/vllm/model_executor/models/qwen3_5.py index 8c7626ffe..731bf3947 100644 --- a/vllm/model_executor/models/qwen3_5.py +++ b/vllm/model_executor/models/qwen3_5.py @@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, + maybe_remap_kv_scale_name, ) from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.sequence import IntermediateTensors @@ -397,6 +398,12 @@ class Qwen3_5Model(Qwen3NextModel): if name.startswith("mtp."): continue + # Remapping the name of FP8 kv-scale. + if name.endswith("scale"): + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + for param_name, weight_name, shard_id in stacked_params_mapping: if "experts.gate_up_proj" in name or "experts.down_proj" in name: is_fused_expert = True