[CI Sprint] Quantization CI Cleanup (#24130)

Signed-off-by: Alex Yun <alexyun04@gmail.com>
2025-11-18 08:21:48 -06:00
parent 184b12fdc6
commit f6aa122698
10 changed files with 32 additions and 26 deletions
--- a/tests/quantization/test_ptpc_fp8.py
+++ b/tests/quantization/test_ptpc_fp8.py
@@ -38,6 +38,7 @@ def test_ptpc_fp8_rocm(vllm_runner, dtype: str, kv_cache_dtype: str) -> None:
            "facebook/opt-125m",
            dtype=dtype,
            quantization="ptpc_fp8",
+            enforce_eager=True,
            kv_cache_dtype=kv_cache_dtype,
        )
    except AssertionError as e:
@@ -65,5 +66,5 @@ def test_ptpc_fp8_rocm(vllm_runner, dtype: str, kv_cache_dtype: str) -> None:

        llm.apply_model(check_model)

-        output = llm.generate_greedy("Hello my name is", max_tokens=20)
+        output = llm.generate_greedy("Hello my name is", max_tokens=4)
        assert output