Add GPTQ Marlin 2:4 sparse structured support (#4790)

Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
Alexander Matveev
2024-05-16 12:56:15 -04:00
committed by GitHub
parent 9216b9cc38
commit 6979ade384
18 changed files with 2130 additions and 40 deletions

View File

@@ -66,6 +66,17 @@ class QuantizationConfig(ABC):
"""Create a config class from the model's quantization config."""
raise NotImplementedError
@classmethod
def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]:
"""
Detects if this quantization method can support a given checkpoint
format by overriding the user specified quantization method --
this method should only be overwritten by subclasses in exceptional
circumstances
"""
return None
@staticmethod
def get_from_keys(config: Dict[str, Any], keys: List[str]) -> Any:
"""Get a value from the model's quantization config."""