[CI] Prune Quantization Tests and skip compilation (#27038)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-10-16 17:26:35 -04:00
parent b3dda72c23
commit 01c977e96d
9 changed files with 62 additions and 134 deletions
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -18,7 +18,6 @@ from vllm.platforms import current_platform

 MODELS = [
    "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
-    "nm-testing/Phi-3-mini-128k-instruct-FP8",
    "nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV",
 ]

@@ -49,8 +48,6 @@ def test_model_load_and_run(


 KV_CACHE_MODELS = [
-    # Deprecated AutoFP8 format using .kv_scale
-    "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
    # AutoFP8 format using separate .k_scale and .v_scale
    "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V",
 ]