Consolidate Intel Quantization Toolkit Integration in vLLM (#31716)
Signed-off-by: yiliu30 <yi4.liu@intel.com>
This commit is contained in:
@@ -26,9 +26,7 @@ MODELS = [
|
||||
)
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
def test_auto_round(vllm_runner, model):
|
||||
with vllm_runner(
|
||||
model, enforce_eager=True, allow_deprecated_quantization=True
|
||||
) as llm:
|
||||
with vllm_runner(model, enforce_eager=True) as llm:
|
||||
output = llm.generate_greedy(["The capital of France is"], max_tokens=8)
|
||||
assert output
|
||||
print(f"{output[0][1]}")
|
||||
|
||||
Reference in New Issue
Block a user