[Test] Make model tests run again and remove --forked from pytest (#3631)

Co-authored-by: Simon Mo <simon.mo@hey.com>
This commit is contained in:
SangBin Cho
2024-03-29 13:06:40 +09:00
committed by GitHub
parent f342153b48
commit 26422e477b
12 changed files with 101 additions and 29 deletions

View File

@@ -8,7 +8,7 @@ Note: Marlin internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for Marlin. As a result, we re-run the test
up to 3 times to see if we pass.
Run `pytest tests/models/test_marlin.py --forked`.
Run `pytest tests/models/test_marlin.py`.
"""
from dataclasses import dataclass
@@ -63,7 +63,6 @@ def test_models(
# Note: not sure why, but deleting just the model on Ada Lovelace
# does not free the GPU memory. On Ampere, deleting the just model
# frees the memory.
del marlin_model.model.llm_engine.driver_worker
del marlin_model
gptq_model = vllm_runner(model_pair.model_gptq, dtype=dtype)
@@ -74,7 +73,6 @@ def test_models(
# Note: not sure why, but deleting just the model on Ada Lovelace
# does not free the GPU memory. On Ampere, deleting the just model
# frees the memory.
del gptq_model.model.llm_engine.driver_worker
del gptq_model
# loop through the prompts