[CORE] Quantized lm-head Framework (#4442)

Co-authored-by: Robert Shaw <rshaw@neuralmagic.com> Co-authored-by: ZX <zx@lbx.dev>
2024-07-03 06:25:17 +08:00
parent 7c008c51a9
commit ee93f4f92a
48 changed files with 268 additions and 121 deletions
--- a/vllm/model_executor/layers/quantization/base_config.py
+++ b/vllm/model_executor/layers/quantization/base_config.py
@@ -87,6 +87,15 @@ class QuantizationConfig(ABC):
        raise ValueError(f"Cannot find any of {keys} in the model's "
                         "quantization config.")

+    @staticmethod
+    def get_from_keys_or(config: Dict[str, Any], keys: List[str],
+                         default: Any) -> Any:
+        """Get a optional value from the model's quantization config."""
+        try:
+            return QuantizationConfig.get_from_keys(config, keys)
+        except ValueError:
+            return default
+
    @abstractmethod
    def get_quant_method(
            self, layer: torch.nn.Module) -> Optional[QuantizeMethodBase]: