[Quantization][Deprecation] Remove Marlin 24 (#32688)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Robert Shaw
2026-01-28 07:54:59 -08:00
committed by GitHub
parent 8e5e40daf4
commit af9b69f977
20 changed files with 159 additions and 3161 deletions

View File

@@ -58,17 +58,6 @@ def models_list(*, all: bool = True, keywords: list[str] | None = None):
)
)
if is_quant_method_supported("gptq_marlin_24"):
TEST_MODELS.append(
(
"alexm-nm/tinyllama-24-marlin24-4bit-g128",
{
"quantization": "gptq_marlin_24",
"allow_deprecated_quantization": True,
},
)
)
if not current_platform.is_rocm() and is_quant_method_supported("awq"):
TEST_MODELS.append(
("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {"quantization": "AWQ"})