2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-06-03 11:20:17 -07:00
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-02-02 14:58:18 -05:00
|
|
|
|
2024-04-03 21:02:43 -07:00
|
|
|
import os
|
2024-04-27 09:59:55 -07:00
|
|
|
import tempfile
|
2024-04-03 21:02:43 -07:00
|
|
|
|
|
|
|
|
import huggingface_hub.constants
|
|
|
|
|
import pytest
|
2024-04-27 09:59:55 -07:00
|
|
|
from huggingface_hub.utils import LocalEntryNotFoundError
|
2024-04-03 21:02:43 -07:00
|
|
|
|
2024-04-27 09:59:55 -07:00
|
|
|
from vllm.model_executor.model_loader.weight_utils import (
|
|
|
|
|
download_weights_from_hf,
|
|
|
|
|
enable_hf_transfer,
|
2026-03-04 18:45:38 +05:30
|
|
|
maybe_remap_kv_scale_name,
|
2024-04-27 09:59:55 -07:00
|
|
|
)
|
2024-04-03 21:02:43 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_hf_transfer_auto_activation():
|
|
|
|
|
if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
|
|
|
|
|
# in case it is already set, we can't test the auto activation
|
|
|
|
|
pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
|
|
|
|
|
enable_hf_transfer()
|
|
|
|
|
try:
|
|
|
|
|
# enable hf hub transfer if available
|
|
|
|
|
import hf_transfer # type: ignore # noqa
|
2025-10-05 15:06:22 +01:00
|
|
|
|
2025-05-15 11:16:15 +02:00
|
|
|
HF_TRANSFER_ACTIVE = True
|
2024-04-03 21:02:43 -07:00
|
|
|
except ImportError:
|
2025-05-15 11:16:15 +02:00
|
|
|
HF_TRANSFER_ACTIVE = False
|
2024-04-03 21:02:43 -07:00
|
|
|
assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
|
|
|
|
|
|
|
|
|
|
|
2024-04-27 09:59:55 -07:00
|
|
|
def test_download_weights_from_hf():
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
|
# assert LocalEntryNotFoundError error is thrown
|
|
|
|
|
# if offline is set and model is not cached
|
|
|
|
|
huggingface_hub.constants.HF_HUB_OFFLINE = True
|
|
|
|
|
with pytest.raises(LocalEntryNotFoundError):
|
|
|
|
|
download_weights_from_hf(
|
|
|
|
|
"facebook/opt-125m",
|
|
|
|
|
allow_patterns=["*.safetensors", "*.bin"],
|
|
|
|
|
cache_dir=tmpdir,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# download the model
|
|
|
|
|
huggingface_hub.constants.HF_HUB_OFFLINE = False
|
|
|
|
|
download_weights_from_hf(
|
|
|
|
|
"facebook/opt-125m",
|
|
|
|
|
allow_patterns=["*.safetensors", "*.bin"],
|
|
|
|
|
cache_dir=tmpdir,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# now it should work offline
|
|
|
|
|
huggingface_hub.constants.HF_HUB_OFFLINE = True
|
|
|
|
|
assert (
|
|
|
|
|
download_weights_from_hf(
|
|
|
|
|
"facebook/opt-125m",
|
|
|
|
|
allow_patterns=["*.safetensors", "*.bin"],
|
|
|
|
|
cache_dir=tmpdir,
|
|
|
|
|
)
|
|
|
|
|
is not None
|
2025-10-05 15:06:22 +01:00
|
|
|
)
|
2024-04-27 09:59:55 -07:00
|
|
|
|
|
|
|
|
|
2026-03-04 18:45:38 +05:30
|
|
|
class TestMaybeRemapKvScaleName:
|
|
|
|
|
"""Tests for maybe_remap_kv_scale_name covering all checkpoint formats."""
|
|
|
|
|
|
|
|
|
|
PARAMS_DICT = {
|
|
|
|
|
"model.layers.0.self_attn.attn.k_scale": None,
|
|
|
|
|
"model.layers.0.self_attn.attn.v_scale": None,
|
|
|
|
|
"model.layers.0.self_attn.attn.q_scale": None,
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.weight": None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def test_qkv_proj_k_scale(self):
|
|
|
|
|
"""Qwen3-MoE / llm-compressor format: qkv_proj.k_scale -> attn.k_scale
|
|
|
|
|
Regression test for https://github.com/vllm-project/vllm/issues/25047"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_qkv_proj_v_scale(self):
|
|
|
|
|
"""Qwen3-MoE / llm-compressor format: qkv_proj.v_scale -> attn.v_scale
|
|
|
|
|
Regression test for https://github.com/vllm-project/vllm/issues/25047"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.v_scale"
|
|
|
|
|
|
|
|
|
|
def test_modelopt_k_proj_k_scale(self):
|
|
|
|
|
"""ModelOpt format: k_proj.k_scale -> attn.k_scale"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_modelopt_v_proj_v_scale(self):
|
|
|
|
|
"""ModelOpt format: v_proj.v_scale -> attn.v_scale"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.v_scale"
|
|
|
|
|
|
|
|
|
|
def test_deprecated_kv_scale(self):
|
|
|
|
|
"""Old format: kv_scale -> attn.k_scale (deprecated)"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.kv_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_default_bare_k_scale(self):
|
|
|
|
|
"""Default format: .k_scale -> .attn.k_scale"""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.k_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_non_scale_name_unchanged(self):
|
|
|
|
|
"""Non-scale names should be returned unchanged."""
|
|
|
|
|
name = "model.layers.0.self_attn.qkv_proj.weight"
|
|
|
|
|
result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
|
|
|
|
|
assert result == name
|
|
|
|
|
|
|
|
|
|
def test_nvfp4_modelopt_k_proj_k_scale(self):
|
|
|
|
|
"""ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):
|
|
|
|
|
k_proj.k_scale -> attn.k_scale.
|
|
|
|
|
Validates that NVFP4 checkpoints are not broken by this change."""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_nvfp4_modelopt_v_proj_v_scale(self):
|
|
|
|
|
"""ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):
|
|
|
|
|
v_proj.v_scale -> attn.v_scale.
|
|
|
|
|
Validates that NVFP4 checkpoints are not broken by this change."""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.v_scale"
|
|
|
|
|
|
|
|
|
|
def test_qwen3_vl_moe_qkv_proj_k_scale(self):
|
|
|
|
|
"""Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.
|
|
|
|
|
Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.k_scale"
|
|
|
|
|
|
|
|
|
|
def test_qwen3_vl_moe_qkv_proj_v_scale(self):
|
|
|
|
|
"""Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.
|
|
|
|
|
Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT
|
|
|
|
|
)
|
|
|
|
|
assert result == "model.layers.0.self_attn.attn.v_scale"
|
|
|
|
|
|
|
|
|
|
def test_nvfp4_weight_scale_not_remapped(self):
|
|
|
|
|
"""NVFP4 weight_scale should not be touched by remap (not a kv scale)."""
|
|
|
|
|
name = "model.layers.0.self_attn.k_proj.weight_scale"
|
|
|
|
|
result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
|
|
|
|
|
assert result == name
|
|
|
|
|
|
|
|
|
|
def test_nvfp4_input_scale_not_remapped(self):
|
|
|
|
|
"""NVFP4 input_scale should not be touched by remap (not a kv scale)."""
|
|
|
|
|
name = "model.layers.0.self_attn.k_proj.input_scale"
|
|
|
|
|
result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
|
|
|
|
|
assert result == name
|
|
|
|
|
|
|
|
|
|
def test_missing_target_returns_none(self):
|
|
|
|
|
"""If remapped name not in params_dict, return None."""
|
|
|
|
|
empty_params: dict[str, None] = {}
|
|
|
|
|
result = maybe_remap_kv_scale_name(
|
|
|
|
|
"model.layers.0.self_attn.qkv_proj.k_scale", empty_params
|
|
|
|
|
)
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
|
2024-04-03 21:02:43 -07:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
test_hf_transfer_auto_activation()
|
2024-04-27 09:59:55 -07:00
|
|
|
test_download_weights_from_hf()
|