[Hardware][AMD][CI][Bugfix] Fix AMD Quantization test group (#31713)

Signed-off-by: Matthew Wong <Matthew.Wong2@amd.com>
This commit is contained in:
Matt
2026-01-11 01:19:46 -06:00
committed by GitHub
parent 9103ed1696
commit bde57ab2ed
12 changed files with 114 additions and 52 deletions

View File

@@ -66,7 +66,7 @@ def test_cpu_offload_compressed_tensors(monkeypatch):
monkeypatch.setenv("VLLM_TEST_FORCE_LOAD_FORMAT", "auto")
# Test wNa16
compare_two_settings(
"nm-testing/tinyllama-oneshot-w4a16-channel-v2",
"nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16",
["--enforce_eager"],
["--enforce_eager", "--cpu-offload-gb", "1"],
max_wait_seconds=480,