[V1] LoRA Support (#10957)

Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com> Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
2025-02-06 23:02:51 +05:30
parent 8108ac841d
commit 467a96a541
16 changed files with 453 additions and 56 deletions
--- a/tests/lora/test_quant_model.py
+++ b/tests/lora/test_quant_model.py
@@ -70,6 +70,14 @@ def do_sample(llm: vllm.LLM,
    return generated_texts


+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines_lora):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("tp_size", [1])
 def test_quant_model_lora(tinyllama_lora_files, num_gpus_available, model,