[Kernel/Quant] Remove the original marlin format and qqq (#23204)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-08-20 15:13:36 -04:00
parent ebe56a0064
commit 0cdbf5e61c
26 changed files with 92 additions and 3698 deletions
--- a/tests/compile/test_full_graph.py
+++ b/tests/compile/test_full_graph.py
@@ -53,12 +53,6 @@ def models_list(*, all: bool = True, keywords: Optional[list[str]] = None):
                "quantization": "gptq_marlin_24"
            }))

-        if is_quant_method_supported("marlin"):
-            TEST_MODELS.append(
-                ("robertgshaw2/TinyLlama-1.1B-Chat-v1.0-g128-marlin", {
-                    "quantization": "marlin"
-                }))
-
        if not current_platform.is_rocm() and is_quant_method_supported("awq"):
            TEST_MODELS.append(("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {
                "quantization": "AWQ"