diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py index 5b8aaa299..de4f19aff 100644 --- a/tests/models/quantization/test_bitsandbytes.py +++ b/tests/models/quantization/test_bitsandbytes.py @@ -6,7 +6,9 @@ Run `pytest tests/quantization/test_bitsandbytes.py`. """ import pytest +from packaging.version import Version from transformers import BitsAndBytesConfig +from transformers import __version__ as TRANSFORMERS_VERSION from tests.quantization.utils import is_quant_method_supported from vllm.platforms import current_platform @@ -138,6 +140,12 @@ def test_load_pp_4bit_bnb_model(model_name, description) -> None: compare_two_settings(model_name, common_args, pp_args) +@pytest.mark.skipif( + Version(TRANSFORMERS_VERSION) >= Version("5.0.0"), + reason="Need to add support for quantizing MoE experts with bnb" + " in transformers v5. See" + " https://github.com/bitsandbytes-foundation/bitsandbytes/issues/1849", +) @pytest.mark.skipif( not is_quant_method_supported("bitsandbytes"), reason="bitsandbytes is not supported on this GPU type.",