[CPU] Support CT W4A16 on CPU MP kernel (#38219)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
Li, Jiang
2026-03-27 14:15:28 +08:00
committed by GitHub
parent a8eab8f30d
commit becaed6ec8
2 changed files with 42 additions and 20 deletions

View File

@@ -11,6 +11,7 @@ MODELS = [
"TheBloke/TinyLlama-1.1B-Chat-v1.0-AWQ",
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", # with g_idx
"Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4", # without g_idx
"RedHatAI/Qwen3-1.7B-quantized.w4a16", # with zp
]
DTYPE = ["bfloat16"]