[Quant] [Bugfix] Fix quantization config matching with hf_to_vllm_mapper (#20046)

2025-07-01 06:20:34 -04:00
parent c05596f1a3
commit 9025a9a705
17 changed files with 107 additions and 29 deletions
--- a/vllm/model_executor/layers/quantization/base_config.py
+++ b/vllm/model_executor/layers/quantization/base_config.py
@@ -10,6 +10,7 @@ from torch import nn

 if TYPE_CHECKING:
    from vllm.model_executor.layers.quantization import QuantizationMethods
+    from vllm.model_executor.models.utils import WeightsMapper
 else:
    QuantizationMethods = str

@@ -149,3 +150,15 @@ class QuantizationConfig(ABC):

    def get_cache_scale(self, name: str) -> Optional[str]:
        return None
+
+    def apply_vllm_mapper(  # noqa: B027
+            self, hf_to_vllm_mapper: "WeightsMapper"):
+        """
+        Interface for models to update module names referenced in
+        quantization configs in order to reflect the vllm model structure
+
+        :param hf_to_vllm_mapper: maps from hf model structure (the assumed
+            structure of the qconfig) to vllm model structure
+        """
+        # TODO (@kylesayrs): add implementations for all subclasses
+        pass