[CI Sprint] Quantization CI Cleanup (#24130)

Signed-off-by: Alex Yun <alexyun04@gmail.com>
This commit is contained in:
Alex
2025-11-18 08:21:48 -06:00
committed by GitHub
parent 184b12fdc6
commit f6aa122698
10 changed files with 32 additions and 26 deletions

View File

@@ -26,7 +26,7 @@ DTYPE = ["bfloat16"]
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", DTYPE)
def test_ipex_quant(vllm_runner, model, dtype):
with vllm_runner(model, dtype=dtype) as llm:
output = llm.generate_greedy(["The capital of France is"], max_tokens=32)
with vllm_runner(model, dtype=dtype, enforce_eager=True) as llm:
output = llm.generate_greedy(["The capital of France is"], max_tokens=4)
assert output
print(output)