[V1] V1 Enablement Oracle (#13726)
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
@@ -101,8 +101,10 @@ def test_register_quantization_config():
|
||||
argvalues=[
|
||||
"meta-llama/Llama-3.2-1B-Instruct",
|
||||
])
|
||||
def test_custom_quant(vllm_runner, model):
|
||||
def test_custom_quant(vllm_runner, model, monkeypatch):
|
||||
"""Test infer with the custom quantization method."""
|
||||
# vllm_runner.apply_model() relies on V0 internals.
|
||||
monkeypatch.setenv("VLLM_USE_V1", "0")
|
||||
with vllm_runner(model_name=model,
|
||||
quantization="custom_quant",
|
||||
enforce_eager=True) as llm:
|
||||
|
||||
Reference in New Issue
Block a user