[Bugfix] Fix tensorizer memory profiling bug during testing (#6881)

This commit is contained in:
Sanger Steel
2024-07-30 14:48:50 -04:00
committed by GitHub
parent 5895b24677
commit 052b6f8ca4
2 changed files with 110 additions and 94 deletions

View File

@@ -1,6 +1,5 @@
# isort: skip_file
import contextlib
import functools
import gc
import pytest
@@ -12,34 +11,38 @@ from vllm.distributed import (destroy_distributed_environment,
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
@pytest.fixture(autouse=True)
def cleanup():
destroy_model_parallel()
destroy_distributed_environment()
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
ray.shutdown()
gc.collect()
torch.cuda.empty_cache()
ray.shutdown()
@pytest.fixture()
def should_do_global_cleanup_after_test(request) -> bool:
"""Allow subdirectories to skip global cleanup by overriding this fixture.
This can provide a ~10x speedup for non-GPU unit tests since they don't need
to initialize torch.
"""
def retry_until_skip(n):
return True
def decorator_retry(func):
@functools.wraps(func)
def wrapper_retry(*args, **kwargs):
for i in range(n):
try:
return func(*args, **kwargs)
except AssertionError:
gc.collect()
torch.cuda.empty_cache()
if i == n - 1:
pytest.skip("Skipping test after attempts..")
@pytest.fixture(autouse=True)
def cleanup_fixture(should_do_global_cleanup_after_test: bool):
yield
if should_do_global_cleanup_after_test:
cleanup()
return wrapper_retry
return decorator_retry
@pytest.fixture(autouse=True)
def tensorizer_config():
config = TensorizerConfig(tensorizer_uri="vllm")
return config
return config