Use Transformers v5 WeightRenaming for Transformers modeling backend (#31545)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-03-13 20:49:08 +00:00
committed by GitHub
parent d0b402974f
commit 0005d2a3c9
7 changed files with 162 additions and 89 deletions

View File

@@ -5,9 +5,10 @@ from collections.abc import Iterable
import pytest
import torch
import transformers
from transformers import AutoConfig, PreTrainedModel
from transformers import AutoConfig, AutoModel, PreTrainedModel
from vllm.config import ModelConfig
from vllm.model_executor.models.transformers.base import Base as TransformersBase
from vllm.model_executor.models.utils import WeightsMapper
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.config import try_get_safetensors_metadata
@@ -23,6 +24,16 @@ def create_repo_dummy_weights(repo: str) -> Iterable[tuple[str, torch.Tensor]]:
return ((name, torch.empty(0)) for name in weight_names)
def create_dummy_base_model(repo: str, model_arch: str) -> PreTrainedModel:
"""
Create weights from a dummy meta deserialized hf base model with name conversion
"""
config = AutoConfig.from_pretrained(repo)
with torch.device("meta"):
model = AutoModel.from_config(config)
return model
def create_dummy_model(repo: str, model_arch: str) -> PreTrainedModel:
"""
Create weights from a dummy meta deserialized hf model with name conversion
@@ -79,6 +90,19 @@ def test_hf_model_weights_mapper(model_arch: str):
dtype=model_info.dtype,
)
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
if issubclass(model_cls, TransformersBase):
# Transformers backend models create their mapper during __init__
# by inspecting the HF model instance. We simulate this by calling
# _create_hf_to_vllm_mapper with a minimal proxy object.
model_cls = type(
"ProxyModelCls",
(),
{
"model": create_dummy_base_model(model_id, model_arch),
"_maybe_apply_model_mapping": lambda self: None,
},
)()
TransformersBase._create_hf_to_vllm_mapper(model_cls)
original_weights = create_repo_dummy_weights(model_id)
hf_dummy_model = create_dummy_model(model_id, model_arch)
@@ -102,9 +126,12 @@ def test_hf_model_weights_mapper(model_arch: str):
# after they are tied in the model, so the mapper will not be able to map them.
# We exclude them from the reference weight names for this test.
if isinstance(tied := getattr(hf_dummy_model, "_tied_weights_keys", None), dict):
mapped_tied_weights = mapper.apply((k, None) for k in tied)
tied_weight_names = set(map(lambda x: x[0], mapped_tied_weights))
ref_weight_names -= tied_weight_names
config = hf_dummy_model.config
key = "tie_word_embeddings"
if getattr(config.get_text_config(), key, False) or getattr(config, key, False):
mapped_tied_weights = mapper.apply((k, None) for k in tied)
tied_weight_names = set(map(lambda x: x[0], mapped_tied_weights))
ref_weight_names -= tied_weight_names
weights_missing = ref_weight_names - weight_names
weights_unmapped = weight_names - ref_weight_names