# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable import pytest import torch import transformers from transformers import AutoConfig, AutoModel, PreTrainedModel from vllm.config import ModelConfig from vllm.model_executor.models.transformers.base import Base as TransformersBase from vllm.model_executor.models.utils import WeightsMapper from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.transformers_utils.config import try_get_safetensors_metadata from ..registry import _MULTIMODAL_EXAMPLE_MODELS, HF_EXAMPLE_MODELS def create_repo_dummy_weights(repo: str) -> Iterable[tuple[str, torch.Tensor]]: """Create weights from safetensors checkpoint metadata""" metadata = try_get_safetensors_metadata(repo) weight_names = list(metadata.weight_map.keys()) with torch.device("meta"): return ((name, torch.empty(0)) for name in weight_names) def create_dummy_base_model(repo: str, model_arch: str) -> PreTrainedModel: """ Create weights from a dummy meta deserialized hf base model with name conversion """ config = AutoConfig.from_pretrained(repo) with torch.device("meta"): model = AutoModel.from_config(config) return model def create_dummy_model(repo: str, model_arch: str) -> PreTrainedModel: """ Create weights from a dummy meta deserialized hf model with name conversion """ model_cls: PreTrainedModel = getattr(transformers, model_arch) config = AutoConfig.from_pretrained(repo) with torch.device("meta"): model = model_cls._from_config(config) return model def model_architectures_for_test() -> list[str]: arch_to_test = list[str]() for model_arch, info in _MULTIMODAL_EXAMPLE_MODELS.items(): if not info.trust_remote_code and hasattr(transformers, model_arch): model_cls: PreTrainedModel = getattr(transformers, model_arch) if getattr(model_cls, "_checkpoint_conversion_mapping", None): arch_to_test.append(model_arch) return arch_to_test @pytest.mark.core_model @pytest.mark.parametrize("model_arch", model_architectures_for_test()) def test_hf_model_weights_mapper(model_arch: str): model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch) model_info.check_available_online(on_fail="skip") model_info.check_transformers_version(on_fail="skip") is_mistral_model = model_arch in [ "Mistral3ForConditionalGeneration", "PixtralForConditionalGeneration", "VoxtralForConditionalGeneration", ] if not is_mistral_model or model_info.tokenizer_mode == "mistral": tokenizer_mode = model_info.tokenizer_mode else: tokenizer_mode = "hf" model_id = model_info.default model_config = ModelConfig( model_id, tokenizer=model_info.tokenizer or model_id, tokenizer_mode=tokenizer_mode, config_format="hf", revision=model_info.revision, trust_remote_code=model_info.trust_remote_code, hf_overrides=model_info.hf_overrides, skip_tokenizer_init=model_info.require_embed_inputs, enable_prompt_embeds=model_info.require_embed_inputs, enable_mm_embeds=model_info.require_embed_inputs, enforce_eager=model_info.enforce_eager, dtype=model_info.dtype, ) model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config) if issubclass(model_cls, TransformersBase): # Transformers backend models create their mapper during __init__ # by inspecting the HF model instance. We simulate this by calling # _create_hf_to_vllm_mapper with a minimal proxy object. model_cls = type( "ProxyModelCls", (), { "model": create_dummy_base_model(model_id, model_arch), "_maybe_apply_model_mapping": lambda self: None, }, )() TransformersBase._create_hf_to_vllm_mapper(model_cls) original_weights = create_repo_dummy_weights(model_id) hf_dummy_model = create_dummy_model(model_id, model_arch) hf_converted_weights = hf_dummy_model.named_parameters() hf_converted_buffers = hf_dummy_model.named_buffers() mapper: WeightsMapper = model_cls.hf_to_vllm_mapper mapped_original_weights = mapper.apply(original_weights) mapped_hf_converted_weights = mapper.apply(hf_converted_weights) mapped_hf_converted_buffers = mapper.apply(hf_converted_buffers) ref_weight_names = set(map(lambda x: x[0], mapped_original_weights)) weight_names = set(map(lambda x: x[0], mapped_hf_converted_weights)) buffer_names = set(map(lambda x: x[0], mapped_hf_converted_buffers)) # Some checkpoints may have buffers, we ignore them for this test ref_weight_names -= buffer_names # Some checkpoints include tied weights (e.g. lm_head tied to embed_tokens) in the # safetensors file. In Transformers v5, named_parameters() will not include them # after they are tied in the model, so the mapper will not be able to map them. # We exclude them from the reference weight names for this test. if isinstance(tied := getattr(hf_dummy_model, "_tied_weights_keys", None), dict): config = hf_dummy_model.config key = "tie_word_embeddings" if getattr(config.get_text_config(), key, False) or getattr(config, key, False): mapped_tied_weights = mapper.apply((k, None) for k in tied) tied_weight_names = set(map(lambda x: x[0], mapped_tied_weights)) ref_weight_names -= tied_weight_names weights_missing = ref_weight_names - weight_names weights_unmapped = weight_names - ref_weight_names assert not weights_missing and not weights_unmapped, ( f"Following weights are not mapped correctly: {weights_unmapped}, " f"Missing expected weights: {weights_missing}." )