From 1cab43c2d2172459987652a7fbf922c6611a71fa Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 3 Apr 2025 01:02:58 +0800 Subject: [PATCH] [misc] instruct pytorch to use nvml-based cuda check (#15951) Signed-off-by: youkaichao --- vllm/__init__.py | 20 ++++---------------- vllm/env_override.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 vllm/env_override.py diff --git a/vllm/__init__.py b/vllm/__init__.py index 457780824..52022fb8f 100644 --- a/vllm/__init__.py +++ b/vllm/__init__.py @@ -4,9 +4,10 @@ # version library first. Such assumption is critical for some customization. from .version import __version__, __version_tuple__ # isort:skip -import os - -import torch +# The environment variables override should be imported before any other +# modules to ensure that the environment variables are set before any +# other modules are imported. +import vllm.env_override # isort:skip # noqa: F401 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine @@ -23,19 +24,6 @@ from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput, from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams -# set some common config/environment variables that should be set -# for all processes created by vllm and all processes -# that interact with vllm workers. -# they are executed whenever `import vllm` is called. - -# see https://github.com/NVIDIA/nccl/issues/1234 -os.environ['NCCL_CUMEM_ENABLE'] = '0' - -# see https://github.com/vllm-project/vllm/issues/10480 -os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1' -# see https://github.com/vllm-project/vllm/issues/10619 -torch._inductor.config.compile_threads = 1 - __all__ = [ "__version__", "__version_tuple__", diff --git a/vllm/env_override.py b/vllm/env_override.py new file mode 100644 index 000000000..0fa5b70c2 --- /dev/null +++ b/vllm/env_override.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +import os + +import torch + +# set some common config/environment variables that should be set +# for all processes created by vllm and all processes +# that interact with vllm workers. +# they are executed whenever `import vllm` is called. + +# see https://github.com/NVIDIA/nccl/issues/1234 +os.environ['NCCL_CUMEM_ENABLE'] = '0' + +# see https://github.com/vllm-project/vllm/pull/15951 +# it avoids unintentional cuda initialization from torch.cuda.is_available() +os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1' + +# see https://github.com/vllm-project/vllm/issues/10480 +os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1' +# see https://github.com/vllm-project/vllm/issues/10619 +torch._inductor.config.compile_threads = 1