[Quantization][Deprecation] Remove Marlin 24 (#32688)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-01-28 07:54:59 -08:00
parent 8e5e40daf4
commit af9b69f977
20 changed files with 159 additions and 3161 deletions
--- a/tests/compile/fullgraph/test_full_graph.py
+++ b/tests/compile/fullgraph/test_full_graph.py
@@ -58,17 +58,6 @@ def models_list(*, all: bool = True, keywords: list[str] | None = None):
                )
            )

-        if is_quant_method_supported("gptq_marlin_24"):
-            TEST_MODELS.append(
-                (
-                    "alexm-nm/tinyllama-24-marlin24-4bit-g128",
-                    {
-                        "quantization": "gptq_marlin_24",
-                        "allow_deprecated_quantization": True,
-                    },
-                )
-            )
-
        if not current_platform.is_rocm() and is_quant_method_supported("awq"):
            TEST_MODELS.append(
                ("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {"quantization": "AWQ"})