[Misc] Enable V1 LoRA by default (#15320)

Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com> Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
2025-04-01 04:53:56 -04:00
parent 30d6a015e0
commit 79455cf421
12 changed files with 125 additions and 87 deletions
--- a/tests/lora/test_quant_model.py
+++ b/tests/lora/test_quant_model.py
@@ -37,6 +37,14 @@ else:
    ]


+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines_lora):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
 def do_sample(llm: vllm.LLM,
              lora_path: str,
              lora_id: int,
@@ -69,14 +77,6 @@ def do_sample(llm: vllm.LLM,
    return generated_texts


-@pytest.fixture(autouse=True)
-def v1(run_with_both_engines_lora):
-    # Simple autouse wrapper to run both engines for each test
-    # This can be promoted up to conftest.py to run for every
-    # test in a package
-    pass
-
-
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("tp_size", [1])
 def test_quant_model_lora(tinyllama_lora_files, num_gpus_available, model,