Add Support for 2/3/8-bit GPTQ Quantization Models (#2330)

This commit is contained in:
CHU Tianxiang
2024-02-29 13:52:23 +08:00
committed by GitHub
parent 929b4f2973
commit 01a5d18a53
8 changed files with 1663 additions and 156 deletions

View File

@@ -98,11 +98,13 @@ torch::Tensor gptq_gemm(
torch::Tensor b_gptq_qzeros,
torch::Tensor b_gptq_scales,
torch::Tensor b_g_idx,
bool use_exllama);
bool use_exllama,
int bit);
void gptq_shuffle(
torch::Tensor q_weight,
torch::Tensor q_perm);
torch::Tensor q_perm,
int bit);
void moe_align_block_size(
torch::Tensor topk_ids,