[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)

2024-10-18 14:30:55 -07:00
parent 9bb10a7d27
commit d11bf435a0
20 changed files with 84 additions and 105 deletions
--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@@ -12,11 +12,11 @@ import torch

 from vllm import SamplingParams
 from vllm.config import ParallelConfig
+from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
 from vllm.outputs import RequestOutput as RealRequestOutput
 from vllm.sampling_params import RequestOutputKind

-from ..conftest import cleanup
 from ..utils import wait_for_gpu_memory_to_clear


@@ -157,7 +157,7 @@ async def async_engine():
        engine.shutdown_background_loop()
        del engine
        await asyncio.sleep(0.1)
-        cleanup()
+        cleanup_dist_env_and_memory()


@pytest.fixture()