[Bugfix] Fix tensorizer memory profiling bug during testing (#6881)
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
# isort: skip_file
|
||||
|
||||
import contextlib
|
||||
import functools
|
||||
import gc
|
||||
|
||||
import pytest
|
||||
@@ -12,34 +11,38 @@ from vllm.distributed import (destroy_distributed_environment,
|
||||
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup():
|
||||
destroy_model_parallel()
|
||||
destroy_distributed_environment()
|
||||
with contextlib.suppress(AssertionError):
|
||||
torch.distributed.destroy_process_group()
|
||||
ray.shutdown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
ray.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def should_do_global_cleanup_after_test(request) -> bool:
|
||||
"""Allow subdirectories to skip global cleanup by overriding this fixture.
|
||||
This can provide a ~10x speedup for non-GPU unit tests since they don't need
|
||||
to initialize torch.
|
||||
"""
|
||||
def retry_until_skip(n):
|
||||
|
||||
return True
|
||||
def decorator_retry(func):
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper_retry(*args, **kwargs):
|
||||
for i in range(n):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except AssertionError:
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
if i == n - 1:
|
||||
pytest.skip("Skipping test after attempts..")
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_fixture(should_do_global_cleanup_after_test: bool):
|
||||
yield
|
||||
if should_do_global_cleanup_after_test:
|
||||
cleanup()
|
||||
return wrapper_retry
|
||||
|
||||
return decorator_retry
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def tensorizer_config():
|
||||
config = TensorizerConfig(tensorizer_uri="vllm")
|
||||
return config
|
||||
return config
|
||||
|
||||
Reference in New Issue
Block a user