[Kernel] AQ AZP 3/4: Asymmetric quantization kernels (#7270)

This commit is contained in:
Luka Govedič
2024-09-16 14:52:40 -04:00
committed by GitHub
parent 781e3b9a42
commit 5d73ae49d6
9 changed files with 339 additions and 57 deletions

View File

@@ -184,10 +184,12 @@ torch::Tensor marlin_qqq_gemm(torch::Tensor const& a,
#endif
void static_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
torch::Tensor const& scale);
torch::Tensor const& scale,
c10::optional<torch::Tensor> const& azp);
void dynamic_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
torch::Tensor& scales);
torch::Tensor& scales,
c10::optional<torch::Tensor> const& azp);
torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
torch::Tensor b_gptq_qzeros,