[misc] instruct pytorch to use nvml-based cuda check (#15951)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-04-03 01:02:58 +08:00
parent 8bd651b318
commit 1cab43c2d2
2 changed files with 25 additions and 16 deletions
--- a/vllm/env_override.py
+++ b/vllm/env_override.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+import os
+
+import torch
+
+# set some common config/environment variables that should be set
+# for all processes created by vllm and all processes
+# that interact with vllm workers.
+# they are executed whenever `import vllm` is called.
+
+# see https://github.com/NVIDIA/nccl/issues/1234
+os.environ['NCCL_CUMEM_ENABLE'] = '0'
+
+# see https://github.com/vllm-project/vllm/pull/15951
+# it avoids unintentional cuda initialization from torch.cuda.is_available()
+os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
+
+# see https://github.com/vllm-project/vllm/issues/10480
+os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
+# see https://github.com/vllm-project/vllm/issues/10619
+torch._inductor.config.compile_threads = 1