From 9f430c94bd0b5cf5a697ddeefd4507ae078bb0ed Mon Sep 17 00:00:00 2001
From: Vadim Gimpelson <156319763+vadiklyutiy@users.noreply.github.com>
Date: Tue, 13 Jan 2026 00:42:06 +0400
Subject: [PATCH] [BUGFIX] Add missed remaping of the names of fp8 kv-scale
 (#32199)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
---
 vllm/model_executor/models/qwen3_next.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
index c3e45de70..0f73a7746 100644
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -64,6 +64,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
 )
 from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader,
+    maybe_remap_kv_scale_name,
     sharded_weight_loader,
 )
 from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
@@ -1065,6 +1066,12 @@ class Qwen3NextModel(nn.Module):
             if name.startswith("mtp."):
                 continue
 
+            # Remapping the name of FP8 kv-scale.
+            if name.endswith("scale"):
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
                     continue