[Kernel][Core] Add AWQ support to the Marlin kernel (#6612)

2024-07-21 19:41:42 -04:00
parent 25e778aa16
commit 396d92d5e0
21 changed files with 1594 additions and 276 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -251,7 +251,7 @@ class ModelConfig:
                    f"supported in ROCm.")
            if (self.quantization
                    not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin",
-                            "fbgemm_fp8", "compressed_tensors")):
+                            "awq_marlin", "fbgemm_fp8", "compressed_tensors")):
                logger.warning(
                    "%s quantization is not fully "
                    "optimized yet. The speed can be slower than "