[Core] Improve choice of Python multiprocessing method (#8823)
Signed-off-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
@@ -15,8 +15,8 @@ from vllm.model_executor.layers.sampler import SamplerOutput
|
||||
from vllm.sequence import ExecuteModelRequest
|
||||
from vllm.triton_utils import maybe_set_triton_cache_manager
|
||||
from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless,
|
||||
get_distributed_init_method, get_open_port,
|
||||
get_vllm_instance_id, make_async,
|
||||
cuda_is_initialized, get_distributed_init_method,
|
||||
get_open_port, get_vllm_instance_id, make_async,
|
||||
update_environment_variables)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -122,6 +122,13 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
|
||||
"CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
|
||||
})
|
||||
|
||||
if (cuda_is_initialized()
|
||||
and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") != "spawn"):
|
||||
logger.warning("CUDA was previously initialized. We must use "
|
||||
"the `spawn` multiprocessing start method. Setting "
|
||||
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
cuda_device_count = cuda_device_count_stateless()
|
||||
# Use confusing message for more common TP-only case.
|
||||
assert tensor_parallel_size <= cuda_device_count, (
|
||||
|
||||
Reference in New Issue
Block a user