[Kernel] AQ AZP 3/4: Asymmetric quantization kernels (#7270)

2024-09-16 14:52:40 -04:00
parent 781e3b9a42
commit 5d73ae49d6
9 changed files with 339 additions and 57 deletions
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -184,10 +184,12 @@ torch::Tensor marlin_qqq_gemm(torch::Tensor const& a,
 #endif

 void static_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
-                              torch::Tensor const& scale);
+                              torch::Tensor const& scale,
+                              c10::optional<torch::Tensor> const& azp);

 void dynamic_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
-                               torch::Tensor& scales);
+                               torch::Tensor& scales,
+                               c10::optional<torch::Tensor> const& azp);

 torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
                        torch::Tensor b_gptq_qzeros,