[Kernel] Add GPTQv2 format support for low-bit or asymmetric quantization, by adapting gptq_gemm (#26092)
This commit is contained in:
@@ -26,4 +26,10 @@ def test_gptq_gemm_opcheck():
|
||||
idx = torch.empty((0,), device="cuda", dtype=torch.int32)
|
||||
use_exllama = True
|
||||
bit = 4
|
||||
opcheck(torch.ops._C.gptq_gemm, (a, weight, zeros, scales, idx, use_exllama, bit))
|
||||
# Test both GPTQv1 and GPTQv2 format
|
||||
opcheck(
|
||||
torch.ops._C.gptq_gemm, (a, weight, zeros, scales, idx, use_exllama, True, bit)
|
||||
)
|
||||
opcheck(
|
||||
torch.ops._C.gptq_gemm, (a, weight, zeros, scales, idx, use_exllama, False, bit)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user