[ Kernel ] Fp8 Channelwise Weight Support (#6487)

2024-07-17 23:18:13 -04:00
parent b5af8c223c
commit 18fecc3559
4 changed files with 76 additions and 35 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -238,7 +238,8 @@ class ModelConfig:
                    f"{self.quantization} quantization is currently not "
                    f"supported in ROCm.")
            if (self.quantization
-                    not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin")):
+                    not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin",
+                            "compressed_tensors")):
                logger.warning(
                    "%s quantization is not fully "
                    "optimized yet. The speed can be slower than "