workaround of AWQ for Turing GPUs (#1252)

2023-10-11 11:48:16 +09:00
parent 91fce82c6f
commit 8285736840
3 changed files with 73 additions and 5 deletions
--- a/vllm/model_executor/quantization_utils/awq.py
+++ b/vllm/model_executor/quantization_utils/awq.py
@@ -42,8 +42,8 @@ class AWQConfig(QuantizationConfig):

    @classmethod
    def get_min_capability(cls) -> int:
-        # The AWQ kernel only supports Ampere or newer GPUs.
-        return 80
+        # The AWQ kernel only supports Turing or newer GPUs.
+        return 75

    @classmethod
    def get_config_filenames(cls) -> List[str]: