[Misc][Quark] Upstream Quark format to VLLM (#10765)

Signed-off-by: kewang-xlnx <kewang@xilinx.com> Signed-off-by: kewang2 <kewang2@amd.com> Co-authored-by: kewang2 <kewang2@amd.com> Co-authored-by: Michael Goin <michael@neuralmagic.com>
2025-01-16 00:05:15 +08:00
parent 5ecf3e0aaf
commit de0526f668
32 changed files with 1264 additions and 70 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -70,7 +70,7 @@ class RocmPlatform(Platform):

    supported_quantization: list[str] = [
        "awq", "gptq", "fp8", "compressed_tensors", "compressed-tensors",
-        "fbgemm_fp8", "gguf"
+        "fbgemm_fp8", "gguf", "quark"
    ]

    @classmethod