[CI Sprint] Quantization CI Cleanup (#24130)

Signed-off-by: Alex Yun <alexyun04@gmail.com>
This commit is contained in:
Alex
2025-11-18 08:21:48 -06:00
committed by GitHub
parent 184b12fdc6
commit f6aa122698
10 changed files with 32 additions and 26 deletions

View File

@@ -19,8 +19,8 @@ def test_cpu_offload_fp8():
# Test loading a quantized checkpoint
compare_two_settings(
"neuralmagic/Qwen2-1.5B-Instruct-FP8",
[],
["--cpu-offload-gb", "1"],
["--enforce_eager"],
["--enforce_eager", "--cpu-offload-gb", "1"],
max_wait_seconds=480,
)
@@ -35,8 +35,8 @@ def test_cpu_offload_gptq(monkeypatch):
# Test GPTQ Marlin
compare_two_settings(
"Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4",
[],
["--cpu-offload-gb", "1"],
["--enforce_eager"],
["--enforce_eager", "--cpu-offload-gb", "1"],
max_wait_seconds=480,
)
@@ -51,8 +51,8 @@ def test_cpu_offload_awq(monkeypatch):
# Test AWQ Marlin
compare_two_settings(
"Qwen/Qwen2-1.5B-Instruct-AWQ",
[],
["--cpu-offload-gb", "1"],
["--enforce_eager"],
["--enforce_eager", "--cpu-offload-gb", "1"],
max_wait_seconds=480,
)
@@ -67,7 +67,7 @@ def test_cpu_offload_compressed_tensors(monkeypatch):
# Test wNa16
compare_two_settings(
"nm-testing/tinyllama-oneshot-w4a16-channel-v2",
[],
["--cpu-offload-gb", "1"],
["--enforce_eager"],
["--enforce_eager", "--cpu-offload-gb", "1"],
max_wait_seconds=480,
)