[distributed][rl] remove nccl cumem env var override (#24141)
Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -13,24 +13,6 @@ logger = init_logger(__name__)
|
||||
# that interact with vllm workers.
|
||||
# they are executed whenever `import vllm` is called.
|
||||
|
||||
if os.environ.get('NCCL_CUMEM_ENABLE', '0') != '0':
|
||||
logger.warning(
|
||||
"NCCL_CUMEM_ENABLE is set to %s, skipping override. "
|
||||
"This may increase memory overhead with cudagraph+allreduce: "
|
||||
"https://github.com/NVIDIA/nccl/issues/1234",
|
||||
os.environ['NCCL_CUMEM_ENABLE'])
|
||||
elif not os.path.exists('/dev/nvidia-caps-imex-channels'):
|
||||
# NCCL requires NCCL_CUMEM_ENABLE to work with
|
||||
# multi-node NVLink, typically on GB200-NVL72 systems.
|
||||
# The ultimate way to detect multi-node NVLink is to use
|
||||
# NVML APIs, which are too expensive to call here.
|
||||
# As an approximation, we check the existence of
|
||||
# /dev/nvidia-caps-imex-channels, used by
|
||||
# multi-node NVLink to communicate across nodes.
|
||||
# This will still cost some GPU memory, but it is worthwhile
|
||||
# because we can get very fast cross-node bandwidth with NVLink.
|
||||
os.environ['NCCL_CUMEM_ENABLE'] = '0'
|
||||
|
||||
# see https://github.com/vllm-project/vllm/pull/15951
|
||||
# it avoids unintentional cuda initialization from torch.cuda.is_available()
|
||||
os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
|
||||
|
||||
Reference in New Issue
Block a user