AWQ: Up to 2.66x higher throughput (#2566)

This commit is contained in:
Casper
2024-01-27 08:53:17 +01:00
committed by GitHub
parent 390b495ff3
commit beb89f68b4
4 changed files with 127 additions and 1 deletions

View File

@@ -51,6 +51,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
#ifndef USE_ROCM
// Quantization ops
ops.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
ops.def("awq_dequantize", &awq_dequantize, "Dequantization for AWQ");
#endif
ops.def("gptq_gemm", &gptq_gemm, "Quantized GEMM for GPTQ");
ops.def("gptq_shuffle", &gptq_shuffle, "Post processing for GPTQ");