tests/model_executor/test_weight_utils.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
import tempfile

import huggingface_hub.constants
import pytest
from huggingface_hub.utils import LocalEntryNotFoundError

from vllm.model_executor.model_loader.weight_utils import (
    download_weights_from_hf,
    enable_hf_transfer,
    maybe_remap_kv_scale_name,
)


def test_hf_transfer_auto_activation():
    if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
        # in case it is already set, we can't test the auto activation
        pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
    enable_hf_transfer()
    try:
        # enable hf hub transfer if available
        import hf_transfer  # type: ignore # noqa

        HF_TRANSFER_ACTIVE = True
    except ImportError:
        HF_TRANSFER_ACTIVE = False
    assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE


def test_download_weights_from_hf():
    with tempfile.TemporaryDirectory() as tmpdir:
        # assert LocalEntryNotFoundError error is thrown
        # if offline is set and model is not cached
        huggingface_hub.constants.HF_HUB_OFFLINE = True
        with pytest.raises(LocalEntryNotFoundError):
            download_weights_from_hf(
                "facebook/opt-125m",
                allow_patterns=["*.safetensors", "*.bin"],
                cache_dir=tmpdir,
            )

        # download the model
        huggingface_hub.constants.HF_HUB_OFFLINE = False
        download_weights_from_hf(
            "facebook/opt-125m",
            allow_patterns=["*.safetensors", "*.bin"],
            cache_dir=tmpdir,
        )

        # now it should work offline
        huggingface_hub.constants.HF_HUB_OFFLINE = True
        assert (
            download_weights_from_hf(
                "facebook/opt-125m",
                allow_patterns=["*.safetensors", "*.bin"],
                cache_dir=tmpdir,
            )
            is not None
        )


class TestMaybeRemapKvScaleName:
    """Tests for maybe_remap_kv_scale_name covering all checkpoint formats."""

    PARAMS_DICT = {
        "model.layers.0.self_attn.attn.k_scale": None,
        "model.layers.0.self_attn.attn.v_scale": None,
        "model.layers.0.self_attn.attn.q_scale": None,
        "model.layers.0.self_attn.qkv_proj.weight": None,
    }

    def test_qkv_proj_k_scale(self):
        """Qwen3-MoE / llm-compressor format: qkv_proj.k_scale -> attn.k_scale
        Regression test for https://github.com/vllm-project/vllm/issues/25047"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_qkv_proj_v_scale(self):
        """Qwen3-MoE / llm-compressor format: qkv_proj.v_scale -> attn.v_scale
        Regression test for https://github.com/vllm-project/vllm/issues/25047"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.v_scale"

    def test_modelopt_k_proj_k_scale(self):
        """ModelOpt format: k_proj.k_scale -> attn.k_scale"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_modelopt_v_proj_v_scale(self):
        """ModelOpt format: v_proj.v_scale -> attn.v_scale"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.v_scale"

    def test_deprecated_kv_scale(self):
        """Old format: kv_scale -> attn.k_scale (deprecated)"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.kv_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_default_bare_k_scale(self):
        """Default format: .k_scale -> .attn.k_scale"""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.k_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_non_scale_name_unchanged(self):
        """Non-scale names should be returned unchanged."""
        name = "model.layers.0.self_attn.qkv_proj.weight"
        result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
        assert result == name

    def test_nvfp4_modelopt_k_proj_k_scale(self):
        """ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):
        k_proj.k_scale -> attn.k_scale.
        Validates that NVFP4 checkpoints are not broken by this change."""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_nvfp4_modelopt_v_proj_v_scale(self):
        """ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):
        v_proj.v_scale -> attn.v_scale.
        Validates that NVFP4 checkpoints are not broken by this change."""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.v_scale"

    def test_qwen3_vl_moe_qkv_proj_k_scale(self):
        """Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.
        Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.k_scale"

    def test_qwen3_vl_moe_qkv_proj_v_scale(self):
        """Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.
        Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT
        )
        assert result == "model.layers.0.self_attn.attn.v_scale"

    def test_nvfp4_weight_scale_not_remapped(self):
        """NVFP4 weight_scale should not be touched by remap (not a kv scale)."""
        name = "model.layers.0.self_attn.k_proj.weight_scale"
        result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
        assert result == name

    def test_nvfp4_input_scale_not_remapped(self):
        """NVFP4 input_scale should not be touched by remap (not a kv scale)."""
        name = "model.layers.0.self_attn.k_proj.input_scale"
        result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)
        assert result == name

    def test_missing_target_returns_none(self):
        """If remapped name not in params_dict, return None."""
        empty_params: dict[str, None] = {}
        result = maybe_remap_kv_scale_name(
            "model.layers.0.self_attn.qkv_proj.k_scale", empty_params
        )
        assert result is None


if __name__ == "__main__":
    test_hf_transfer_auto_activation()
    test_download_weights_from_hf()
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00			`import os`
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`import tempfile`
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00
			`import huggingface_hub.constants`
			`import pytest`
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`from huggingface_hub.utils import LocalEntryNotFoundError`
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`from vllm.model_executor.model_loader.weight_utils import (`
			`download_weights_from_hf,`
			`enable_hf_transfer,`
[Bugfix][Model] Fix FP8 k_scale/v_scale not loaded for Qwen3-MoE (#35656) Signed-off-by: raghavan <oneraghavan@gmail.com> 2026-03-04 18:45:38 +05:30			`maybe_remap_kv_scale_name,`
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`)`
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00

			`def test_hf_transfer_auto_activation():`
			`if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:`
			`# in case it is already set, we can't test the auto activation`
			`pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")`
			`enable_hf_transfer()`
			`try:`
			`# enable hf hub transfer if available`
			`import hf_transfer # type: ignore # noqa`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00
fix: typos (#18151) Signed-off-by: omahs <73983677+omahs@users.noreply.github.com> 2025-05-15 11:16:15 +02:00			`HF_TRANSFER_ACTIVE = True`
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00			`except ImportError:`
fix: typos (#18151) Signed-off-by: omahs <73983677+omahs@users.noreply.github.com> 2025-05-15 11:16:15 +02:00			`HF_TRANSFER_ACTIVE = False`
[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00			`assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE`


[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`def test_download_weights_from_hf():`
			`with tempfile.TemporaryDirectory() as tmpdir:`
			`# assert LocalEntryNotFoundError error is thrown`
			`# if offline is set and model is not cached`
			`huggingface_hub.constants.HF_HUB_OFFLINE = True`
			`with pytest.raises(LocalEntryNotFoundError):`
			`download_weights_from_hf(`
			`"facebook/opt-125m",`
			`allow_patterns=[".safetensors", ".bin"],`
			`cache_dir=tmpdir,`
			`)`

			`# download the model`
			`huggingface_hub.constants.HF_HUB_OFFLINE = False`
			`download_weights_from_hf(`
			`"facebook/opt-125m",`
			`allow_patterns=[".safetensors", ".bin"],`
			`cache_dir=tmpdir,`
			`)`

			`# now it should work offline`
			`huggingface_hub.constants.HF_HUB_OFFLINE = True`
			`assert (`
			`download_weights_from_hf(`
			`"facebook/opt-125m",`
			`allow_patterns=[".safetensors", ".bin"],`
			`cache_dir=tmpdir,`
			`)`
			`is not None`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00			`)`
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00

[Bugfix][Model] Fix FP8 k_scale/v_scale not loaded for Qwen3-MoE (#35656) Signed-off-by: raghavan <oneraghavan@gmail.com> 2026-03-04 18:45:38 +05:30			`class TestMaybeRemapKvScaleName:`
			`"""Tests for maybe_remap_kv_scale_name covering all checkpoint formats."""`

			`PARAMS_DICT = {`
			`"model.layers.0.self_attn.attn.k_scale": None,`
			`"model.layers.0.self_attn.attn.v_scale": None,`
			`"model.layers.0.self_attn.attn.q_scale": None,`
			`"model.layers.0.self_attn.qkv_proj.weight": None,`
			`}`

			`def test_qkv_proj_k_scale(self):`
			`"""Qwen3-MoE / llm-compressor format: qkv_proj.k_scale -> attn.k_scale`
			`Regression test for https://github.com/vllm-project/vllm/issues/25047"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_qkv_proj_v_scale(self):`
			`"""Qwen3-MoE / llm-compressor format: qkv_proj.v_scale -> attn.v_scale`
			`Regression test for https://github.com/vllm-project/vllm/issues/25047"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.v_scale"`

			`def test_modelopt_k_proj_k_scale(self):`
			`"""ModelOpt format: k_proj.k_scale -> attn.k_scale"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_modelopt_v_proj_v_scale(self):`
			`"""ModelOpt format: v_proj.v_scale -> attn.v_scale"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.v_scale"`

			`def test_deprecated_kv_scale(self):`
			`"""Old format: kv_scale -> attn.k_scale (deprecated)"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.kv_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_default_bare_k_scale(self):`
			`"""Default format: .k_scale -> .attn.k_scale"""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.k_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_non_scale_name_unchanged(self):`
			`"""Non-scale names should be returned unchanged."""`
			`name = "model.layers.0.self_attn.qkv_proj.weight"`
			`result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)`
			`assert result == name`

			`def test_nvfp4_modelopt_k_proj_k_scale(self):`
			`"""ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):`
			`k_proj.k_scale -> attn.k_scale.`
			`Validates that NVFP4 checkpoints are not broken by this change."""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.k_proj.k_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_nvfp4_modelopt_v_proj_v_scale(self):`
			`"""ModelOpt NVFP4 format (e.g. nvidia/Qwen3-30B-A3B-NVFP4):`
			`v_proj.v_scale -> attn.v_scale.`
			`Validates that NVFP4 checkpoints are not broken by this change."""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.v_proj.v_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.v_scale"`

			`def test_qwen3_vl_moe_qkv_proj_k_scale(self):`
			`"""Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.`
			`Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.qkv_proj.k_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.k_scale"`

			`def test_qwen3_vl_moe_qkv_proj_v_scale(self):`
			`"""Qwen3-VL-MoE uses the same fused qkv_proj naming as Qwen3-MoE.`
			`Regression test for qwen3_vl_moe.py fix (same bug as #25047)."""`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.qkv_proj.v_scale", self.PARAMS_DICT`
			`)`
			`assert result == "model.layers.0.self_attn.attn.v_scale"`

			`def test_nvfp4_weight_scale_not_remapped(self):`
			`"""NVFP4 weight_scale should not be touched by remap (not a kv scale)."""`
			`name = "model.layers.0.self_attn.k_proj.weight_scale"`
			`result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)`
			`assert result == name`

			`def test_nvfp4_input_scale_not_remapped(self):`
			`"""NVFP4 input_scale should not be touched by remap (not a kv scale)."""`
			`name = "model.layers.0.self_attn.k_proj.input_scale"`
			`result = maybe_remap_kv_scale_name(name, self.PARAMS_DICT)`
			`assert result == name`

			`def test_missing_target_returns_none(self):`
			`"""If remapped name not in params_dict, return None."""`
			`empty_params: dict[str, None] = {}`
			`result = maybe_remap_kv_scale_name(`
			`"model.layers.0.self_attn.qkv_proj.k_scale", empty_params`
			`)`
			`assert result is None`


[Core] Enable hf_transfer by default if available (#3817) 2024-04-03 21:02:43 -07:00			`if __name__ == "__main__":`
			`test_hf_transfer_auto_activation()`
[Core] Support offline use of local cache for models (#4374) Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com> 2024-04-27 09:59:55 -07:00			`test_download_weights_from_hf()`