[Misc][Kernel]: Add GPTQAllSpark Quantization (#12931)

This commit is contained in:
YajieWang
2025-03-01 14:30:59 +08:00
committed by GitHub
parent 6a84164add
commit 6a92ff93e1
12 changed files with 2005 additions and 4 deletions

View File

@@ -215,8 +215,6 @@ def test_compressed_tensors_wNa16(vllm_runner, wNa16_args):
assert qkv_proj.scheme.group_size == (-1
if group is None else group)
assert qkv_proj.weight_packed.dtype is torch.int32
assert qkv_proj.weight_scale.dtype is torch.float16
assert qkv_proj.scheme.pack_factor == pack_factor
llm.apply_model(check_model)