[Hardware][AMD][CI][Bugfix] Fix AMD Quantization test group (#31713)

Signed-off-by: Matthew Wong <Matthew.Wong2@amd.com>
2026-01-11 01:19:46 -06:00
parent 9103ed1696
commit bde57ab2ed
12 changed files with 114 additions and 52 deletions
--- a/tests/quantization/test_configs.py
+++ b/tests/quantization/test_configs.py
@@ -10,6 +10,7 @@ from dataclasses import dataclass
 import pytest

 from vllm.config import ModelConfig
+from vllm.platforms import current_platform


@dataclass
@@ -23,20 +24,44 @@ MODEL_ARG_EXPTYPES = [
    # AUTOGPTQ
    # compat: autogptq <=0.7.1 is_marlin_format: bool
    # Model Serialized in Exllama Format.
-    ("TheBloke/Llama-2-7B-Chat-GPTQ", None, "gptq_marlin"),
-    ("TheBloke/Llama-2-7B-Chat-GPTQ", "marlin", "gptq_marlin"),
+    (
+        "TheBloke/Llama-2-7B-Chat-GPTQ",
+        None,
+        "gptq_marlin" if current_platform.is_cuda() else "gptq",
+    ),
+    (
+        "TheBloke/Llama-2-7B-Chat-GPTQ",
+        "marlin",
+        "gptq_marlin" if current_platform.is_cuda() else "ERROR",
+    ),
    ("TheBloke/Llama-2-7B-Chat-GPTQ", "gptq", "gptq"),
    ("TheBloke/Llama-2-7B-Chat-GPTQ", "awq", "ERROR"),
    # compat: autogptq >=0.8.0 use checkpoint_format: str
    # Model Serialized in Exllama Format.
-    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", None, "gptq_marlin"),
-    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "marlin", "gptq_marlin"),
+    (
+        "LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit",
+        None,
+        "gptq_marlin" if current_platform.is_cuda() else "gptq",
+    ),
+    (
+        "LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit",
+        "marlin",
+        "gptq_marlin" if current_platform.is_cuda() else "ERROR",
+    ),
    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "gptq", "gptq"),
    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "awq", "ERROR"),
    # AUTOAWQ
-    ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", None, "awq_marlin"),
+    (
+        "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ",
+        None,
+        "awq_marlin" if current_platform.is_cuda() else "awq",
+    ),
    ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", "awq", "awq"),
-    ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", "marlin", "awq_marlin"),
+    (
+        "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ",
+        "marlin",
+        "awq_marlin" if current_platform.is_cuda() else "ERROR",
+    ),
    ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", "gptq", "ERROR"),
 ]