[CI] Prune tests/models/decoder_only/language/* tests (#9940)

Signed-off-by: mgoin <michael@neuralmagic.com>
2024-11-05 16:02:23 -05:00
parent b9c64c0ca7
commit 02462465ea
9 changed files with 70 additions and 270 deletions
--- a/tests/models/decoder_only/language/test_gptq_marlin.py
+++ b/tests/models/decoder_only/language/test_gptq_marlin.py
@@ -22,24 +22,11 @@ os.environ["TOKENIZERS_PARALLELISM"] = "true"
 MAX_MODEL_LEN = 1024

 MODELS = [
-    # act_order==False, group_size=channelwise
-    ("robertgshaw2/zephyr-7b-beta-channelwise-gptq", "main"),
-    # act_order==False, group_size=128
-    ("TheBloke/Llama-2-7B-GPTQ", "main"),
-
    # act_order==True, group_size=128
    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "main"),
-    # act_order==True, group_size=64
-    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "gptq-4bit-64g-actorder_True"),
-    # act_order==True, group_size=32
-    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "gptq-4bit-32g-actorder_True"),

    # 8-bit, act_order==True, group_size=channelwise
    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "gptq-8bit--1g-actorder_True"),
-    # 8-bit, act_order==True, group_size=128
-    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "gptq-8bit-128g-actorder_True"),
-    # 8-bit, act_order==True, group_size=32
-    ("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", "gptq-8bit-32g-actorder_True"),

    # 4-bit, act_order==True, group_size=128
    ("TechxGenus/gemma-1.1-2b-it-GPTQ", "main")