[CI Sprint] Quantization CI Cleanup (#24130)

Signed-off-by: Alex Yun <alexyun04@gmail.com>
This commit is contained in:
Alex
2025-11-18 08:21:48 -06:00
committed by GitHub
parent 184b12fdc6
commit f6aa122698
10 changed files with 32 additions and 26 deletions

View File

@@ -392,7 +392,7 @@ def test_opt_125m_int4wo_model_running_preshuffled_kernel_online_quant(
assert not has_int4_preshuffled_tensor
assert weight_attrs == [False, 1, 0, True]
output = llm.generate_greedy(["The capital of France is"], max_tokens=32)
output = llm.generate_greedy(["The capital of France is"], max_tokens=4)
assert output