diff --git a/tests/quantization/test_register_quantization_config.py b/tests/quantization/test_register_quantization_config.py index aeef4c2fd..8da048703 100644 --- a/tests/quantization/test_register_quantization_config.py +++ b/tests/quantization/test_register_quantization_config.py @@ -7,6 +7,7 @@ See https://github.com/vllm-project/vllm/issues/11926 for more details. Run `pytest tests/quantization/test_register_quantization_config.py`. """ +import logging from typing import Any import pytest @@ -100,17 +101,22 @@ class CustomQuantConfig(QuantizationConfig): return None -def test_register_quantization_config(): +def test_register_quantization_config(caplog_vllm): """Test register custom quantization config.""" # The quantization method `custom_quant` should be registered. assert get_quantization_config("custom_quant") == CustomQuantConfig # The quantization method `custom_quant` is already exists, - # should raise an error. - with pytest.raises(ValueError): + # should raise a warning when re-registering it. + with caplog_vllm.at_level(logging.WARNING): register_quantization_config("custom_quant")(CustomQuantConfig) + assert any( + "The quantization method 'custom_quant' already exists" in message + for message in caplog_vllm.messages + ), "Expected a warning when re-registering custom_quant" + @pytest.mark.parametrize( argnames="model", diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index b92fb8d26..bb42b10f8 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -3,8 +3,11 @@ from typing import Literal, get_args +from vllm.logger import init_logger from vllm.model_executor.layers.quantization.base_config import QuantizationConfig +logger = init_logger(__name__) + QuantizationMethods = Literal[ "awq", "deepspeedfp", @@ -70,15 +73,20 @@ def register_quantization_config(quantization: str): def _wrapper(quant_config_cls): if quantization in QUANTIZATION_METHODS: - raise ValueError( - f"The quantization method `{quantization}` is already exists." + logger.warning( + "The quantization method '%s' already exists and will be " + "overwritten by the quantization config %s.", + quantization, + quant_config_cls, ) + else: + QUANTIZATION_METHODS.append(quantization) + if not issubclass(quant_config_cls, QuantizationConfig): raise ValueError( "The quantization config must be a subclass of `QuantizationConfig`." ) _CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls - QUANTIZATION_METHODS.append(quantization) return quant_config_cls return _wrapper