[BUGFIX] Add missed remaping of the names of fp8 kv-scale (#32199)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
This commit is contained in:
Vadim Gimpelson
2026-01-13 00:42:06 +04:00
committed by GitHub
parent f8bd8394e3
commit 9f430c94bd

View File

@@ -64,6 +64,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
)
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
sharded_weight_loader,
)
from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
@@ -1065,6 +1066,12 @@ class Qwen3NextModel(nn.Module):
if name.startswith("mtp."):
continue
# Remapping the name of FP8 kv-scale.
if name.endswith("scale"):
name = maybe_remap_kv_scale_name(name, params_dict)
if name is None:
continue
for param_name, weight_name, shard_id in stacked_params_mapping:
if weight_name not in name:
continue