Add GPTQ support (#916)

2023-12-15 19:04:22 +08:00
parent c06170cc8e
commit 0fbfc4b81b
35 changed files with 1782 additions and 82 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -142,7 +142,7 @@ class ModelConfig:
        self.tokenizer_mode = tokenizer_mode

    def _verify_quantization(self) -> None:
-        supported_quantization = ["awq", "squeezellm"]
+        supported_quantization = ["awq", "gptq", "squeezellm"]
        rocm_not_supported_quantization = ["awq"]
        if self.quantization is not None:
            self.quantization = self.quantization.lower()