Consolidate Intel Quantization Toolkit Integration in vLLM (#31716)

Signed-off-by: yiliu30 <yi4.liu@intel.com>
This commit is contained in:
Yi Liu
2026-01-14 15:11:30 +08:00
committed by GitHub
parent 6fa6e7ef0c
commit 50632adc58
10 changed files with 531 additions and 660 deletions

View File

@@ -26,9 +26,7 @@ MODELS = [
)
@pytest.mark.parametrize("model", MODELS)
def test_auto_round(vllm_runner, model):
with vllm_runner(
model, enforce_eager=True, allow_deprecated_quantization=True
) as llm:
with vllm_runner(model, enforce_eager=True) as llm:
output = llm.generate_greedy(["The capital of France is"], max_tokens=8)
assert output
print(f"{output[0][1]}")