[Kernel] Add Exllama as a backend for compressed-tensors (#9395)

2024-10-17 09:48:26 -04:00
parent dbfa8d31d5
commit e312e52b44
7 changed files with 173 additions and 16 deletions
--- a/vllm/scalar_type.py
+++ b/vllm/scalar_type.py
@@ -27,6 +27,8 @@ class scalar_types:
    float6_e3m2f = ScalarType.float_(3, 2, True, NanRepr.NONE.value)

    # "gptq" types
+    uint2b2 = ScalarType.uint(2, 2)
+    uint3b4 = ScalarType.uint(3, 4)
    uint4b8 = ScalarType.uint(4, 8)
    uint8b128 = ScalarType.uint(8, 128)