[CI] Prune Quantization Tests and skip compilation (#27038)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-10-16 17:26:35 -04:00
parent b3dda72c23
commit 01c977e96d
9 changed files with 62 additions and 134 deletions
--- a/tests/quantization/test_lm_head.py
+++ b/tests/quantization/test_lm_head.py
@@ -31,7 +31,9 @@ def test_lm_head(
 ) -> None:
    # `LLM.apply_model` requires pickling a function.
    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
-    with vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) as vllm_model:
+    with vllm_runner(
+        model_id, dtype=torch.float16, max_model_len=2048, enforce_eager=True
+    ) as vllm_model:

        def check_model(model):
            lm_head_layer = model.lm_head