From 9f430c94bd0b5cf5a697ddeefd4507ae078bb0ed Mon Sep 17 00:00:00 2001 From: Vadim Gimpelson <156319763+vadiklyutiy@users.noreply.github.com> Date: Tue, 13 Jan 2026 00:42:06 +0400 Subject: [PATCH] [BUGFIX] Add missed remaping of the names of fp8 kv-scale (#32199) Signed-off-by: Vadim Gimpelson --- vllm/model_executor/models/qwen3_next.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index c3e45de70..0f73a7746 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -64,6 +64,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, + maybe_remap_kv_scale_name, sharded_weight_loader, ) from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP @@ -1065,6 +1066,12 @@ class Qwen3NextModel(nn.Module): if name.startswith("mtp."): continue + # Remapping the name of FP8 kv-scale. + if name.endswith("scale"): + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: continue