[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)

2024-10-18 14:30:55 -07:00
parent 9bb10a7d27
commit d11bf435a0
20 changed files with 84 additions and 105 deletions
--- a/tests/lora/test_quant_model.py
+++ b/tests/lora/test_quant_model.py
@@ -6,11 +6,10 @@ from typing import List
 import pytest

 import vllm
+from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.lora.request import LoRARequest
 from vllm.utils import is_hip

-from .conftest import cleanup
-

@dataclass
 class ModelWithQuantization:
@@ -160,7 +159,7 @@ def test_quant_model_lora(tinyllama_lora_files, num_gpus_available, model,
    print("removing lora")

    del llm
-    cleanup()
+    cleanup_dist_env_and_memory()


@pytest.mark.parametrize("model", MODELS)
@@ -181,7 +180,7 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
    output_tp1 = do_sample(llm_tp1, tinyllama_lora_files, lora_id=1)

    del llm_tp1
-    cleanup()
+    cleanup_dist_env_and_memory()

    llm_tp2 = vllm.LLM(
        model=model.model_path,
@@ -194,6 +193,6 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
    output_tp2 = do_sample(llm_tp2, tinyllama_lora_files, lora_id=1)

    del llm_tp2
-    cleanup()
+    cleanup_dist_env_and_memory()

    assert output_tp1 == output_tp2