[MISC] Consolidate cleanup() and refactor offline_inference_with_prefix.py (#9510)
This commit is contained in:
@@ -1,27 +1,18 @@
|
||||
import contextlib
|
||||
import functools
|
||||
import gc
|
||||
from typing import Callable, TypeVar
|
||||
|
||||
import pytest
|
||||
import ray
|
||||
import torch
|
||||
from typing_extensions import ParamSpec
|
||||
|
||||
from vllm.distributed import (destroy_distributed_environment,
|
||||
destroy_model_parallel)
|
||||
from vllm.distributed import cleanup_dist_env_and_memory
|
||||
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup():
|
||||
destroy_model_parallel()
|
||||
destroy_distributed_environment()
|
||||
with contextlib.suppress(AssertionError):
|
||||
torch.distributed.destroy_process_group()
|
||||
ray.shutdown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
cleanup_dist_env_and_memory(shutdown_ray=True)
|
||||
|
||||
|
||||
_P = ParamSpec("_P")
|
||||
|
||||
Reference in New Issue
Block a user