[Misc][Kernel]: Add GPTQAllSpark Quantization (#12931)

2025-03-01 14:30:59 +08:00
parent 6a84164add
commit 6a92ff93e1
12 changed files with 2005 additions and 4 deletions
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -215,8 +215,6 @@ def test_compressed_tensors_wNa16(vllm_runner, wNa16_args):
            assert qkv_proj.scheme.group_size == (-1
                                                  if group is None else group)

-            assert qkv_proj.weight_packed.dtype is torch.int32
-            assert qkv_proj.weight_scale.dtype is torch.float16
            assert qkv_proj.scheme.pack_factor == pack_factor

        llm.apply_model(check_model)