[BUGFIX] Add missed remaping of the names of fp8 kv-scale (#32199)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
2026-01-13 00:42:06 +04:00
parent f8bd8394e3
commit 9f430c94bd
1 changed files with 7 additions and 0 deletions
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -64,6 +64,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
 )
 from vllm.model_executor.model_loader.weight_utils import (
    default_weight_loader,
+    maybe_remap_kv_scale_name,
    sharded_weight_loader,
 )
 from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
@@ -1065,6 +1066,12 @@ class Qwen3NextModel(nn.Module):
            if name.startswith("mtp."):
                continue

+            # Remapping the name of FP8 kv-scale.
+            if name.endswith("scale"):
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+
            for param_name, weight_name, shard_id in stacked_params_mapping:
                if weight_name not in name:
                    continue