From a2522839d87d2b81b57458dfdbbcb27afb8191ae Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Thu, 5 Feb 2026 18:29:54 +0800 Subject: [PATCH] [Bugfix] Fix Kimi-K2.5 NVFP4 checkpoints weight loading (#33876) Signed-off-by: Isotr0py --- vllm/model_executor/models/deepseek_v2.py | 2 +- vllm/model_executor/models/kimi_k25.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index f8907ed86..464518a3d 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -1485,7 +1485,7 @@ class DeepseekV2ForCausalLM( param, "weight_loader", default_weight_loader ) weight_loader(param, loaded_weight) - if not is_fusion_moe_shared_experts_layer: + if name is not None and not is_fusion_moe_shared_experts_layer: loaded_params.add(name) return loaded_params diff --git a/vllm/model_executor/models/kimi_k25.py b/vllm/model_executor/models/kimi_k25.py index 191aed8e5..cb07cfe98 100644 --- a/vllm/model_executor/models/kimi_k25.py +++ b/vllm/model_executor/models/kimi_k25.py @@ -24,7 +24,11 @@ from transformers.processing_utils import ProcessorMixin from vllm.config import VllmConfig from vllm.config.multimodal import BaseDummyOptions from vllm.logger import init_logger -from vllm.model_executor.models.interfaces import SupportsMultiModal, SupportsPP +from vllm.model_executor.models.interfaces import ( + SupportsMultiModal, + SupportsPP, + SupportsQuant, +) from vllm.model_executor.models.kimi_k25_vit import ( KimiK25MultiModalProjector, MoonViT3dPretrainedModel, @@ -302,7 +306,9 @@ class KimiK25MultiModalProcessor(BaseMultiModalProcessor[KimiK25ProcessingInfo]) info=KimiK25ProcessingInfo, dummy_inputs=KimiK25DummyInputsBuilder, ) -class KimiK25ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): +class KimiK25ForConditionalGeneration( + nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant +): """Kimi-K2.5 model for conditional generation. Supports both image and video-chunk modalities. @@ -312,8 +318,12 @@ class KimiK25ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) supports_encoder_tp_data = True - weights_mapper = WeightsMapper( + hf_to_vllm_mapper = WeightsMapper( orig_to_new_prefix={ + # For legacy NVFP4 checkpoint compatibility: + # see https://github.com/vllm-project/vllm/pull/33346#issuecomment-3851475033 + "language_model.layers.": "language_model.model.layers.", + # mm projector "mm_projector.proj.0": "mm_projector.linear_1", "mm_projector.proj.2": "mm_projector.linear_2", } @@ -465,4 +475,4 @@ class KimiK25ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loader = AutoWeightsLoader(self) - return loader.load_weights(weights, mapper=self.weights_mapper) + return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)