[CI Sprint] Quantization CI Cleanup (#24130)

Signed-off-by: Alex Yun <alexyun04@gmail.com>
2025-11-18 08:21:48 -06:00
parent 184b12fdc6
commit f6aa122698
10 changed files with 32 additions and 26 deletions
--- a/tests/quantization/test_torchao.py
+++ b/tests/quantization/test_torchao.py
@@ -392,7 +392,7 @@ def test_opt_125m_int4wo_model_running_preshuffled_kernel_online_quant(
            assert not has_int4_preshuffled_tensor

        assert weight_attrs == [False, 1, 0, True]
-        output = llm.generate_greedy(["The capital of France is"], max_tokens=32)
+        output = llm.generate_greedy(["The capital of France is"], max_tokens=4)

        assert output