[Bugfix][Kernel] Add IQ1_M quantization implementation to GGUF kernel (#8357)

This commit is contained in:
Isotr0py
2024-09-16 06:51:44 +08:00
committed by GitHub
parent 3724d5f6b5
commit fc990f9795
8 changed files with 547 additions and 161 deletions

View File

@@ -166,6 +166,11 @@ torch::Tensor ggml_mul_mat_vec_a8(torch::Tensor W, // quant weight
(void*)quant_X.data_ptr(),
(half*)Y.data_ptr(), col, row, stream);
break;
case 29:
mul_mat_vec_iq1_m_q8_1_cuda((void*)W.data_ptr(),
(void*)quant_X.data_ptr(),
(half*)Y.data_ptr(), col, row, stream);
break;
}
return Y;
}