[Test] Make model tests run again and remove --forked from pytest (#3631)

Co-authored-by: Simon Mo <simon.mo@hey.com>
2024-03-29 13:06:40 +09:00
parent f342153b48
commit 26422e477b
12 changed files with 101 additions and 29 deletions
--- a/tests/models/test_marlin.py
+++ b/tests/models/test_marlin.py
@@ -8,7 +8,7 @@ Note: Marlin internally uses locks to synchronize the threads. This can
 result in very slight nondeterminism for Marlin. As a result, we re-run the test
 up to 3 times to see if we pass.

-Run `pytest tests/models/test_marlin.py --forked`.
+Run `pytest tests/models/test_marlin.py`.
 """

 from dataclasses import dataclass
@@ -63,7 +63,6 @@ def test_models(
    # Note: not sure why, but deleting just the model on Ada Lovelace
    #   does not free the GPU memory. On Ampere, deleting the just model
    #   frees the memory.
-    del marlin_model.model.llm_engine.driver_worker
    del marlin_model

    gptq_model = vllm_runner(model_pair.model_gptq, dtype=dtype)
@@ -74,7 +73,6 @@ def test_models(
    # Note: not sure why, but deleting just the model on Ada Lovelace
    #   does not free the GPU memory. On Ampere, deleting the just model
    #   frees the memory.
-    del gptq_model.model.llm_engine.driver_worker
    del gptq_model

    # loop through the prompts