[CI] Prune Quantization Tests and skip compilation (#27038)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-10-16 17:26:35 -04:00
parent b3dda72c23
commit 01c977e96d
9 changed files with 62 additions and 134 deletions
--- a/tests/quantization/test_rtn.py
+++ b/tests/quantization/test_rtn.py
@@ -10,7 +10,6 @@ import pytest
 from tests.quantization.utils import is_quant_method_supported

 MODELS = [
-    "microsoft/Phi-3-mini-4k-instruct",  # dense model
    "ai21labs/Jamba-tiny-dev",  # MoE model
 ]

@@ -30,5 +29,7 @@ def test_model_rtn_startup(
    dtype: str,
    max_tokens: int,
 ) -> None:
-    with vllm_runner(model, dtype=dtype, quantization="rtn") as vllm_model:
+    with vllm_runner(
+        model, enforce_eager=True, dtype=dtype, quantization="rtn"
+    ) as vllm_model:
        vllm_model.generate_greedy(example_prompts, max_tokens)