[Quantization][Refactor] Move CPU GPTQ kernel into MP linear (#31801)

Signed-off-by: jiang1.li <jiang1.li@intel.com> Signed-off-by: Li, Jiang <bigpyj64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-01-07 03:10:18 +08:00
parent c07163663d
commit 8becf146bd
9 changed files with 171 additions and 332 deletions
--- a/tests/quantization/test_cpu_wna16.py
+++ b/tests/quantization/test_cpu_wna16.py
@@ -10,6 +10,7 @@ if not current_platform.is_cpu():
 MODELS = [
    "TheBloke/TinyLlama-1.1B-Chat-v1.0-AWQ",
    "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ",  # with g_idx
+    "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4",  # without g_idx
 ]
 DTYPE = ["bfloat16"]