[Bugfix] set VLLM_WORKER_MULTIPROC_METHOD=spawn for vllm.entrypoionts.openai.api_server (#15700)
Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com>
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# The CLI entrypoint to vLLM.
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
|
||||
@@ -9,11 +8,9 @@ import vllm.entrypoints.cli.benchmark.main
|
||||
import vllm.entrypoints.cli.openai
|
||||
import vllm.entrypoints.cli.serve
|
||||
import vllm.version
|
||||
from vllm.logger import init_logger
|
||||
from vllm.entrypoints.utils import cli_env_setup
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
CMD_MODULES = [
|
||||
vllm.entrypoints.cli.openai,
|
||||
vllm.entrypoints.cli.serve,
|
||||
@@ -30,29 +27,8 @@ def register_signal_handlers():
|
||||
signal.signal(signal.SIGTSTP, signal_handler)
|
||||
|
||||
|
||||
def env_setup():
|
||||
# The safest multiprocessing method is `spawn`, as the default `fork` method
|
||||
# is not compatible with some accelerators. The default method will be
|
||||
# changing in future versions of Python, so we should use it explicitly when
|
||||
# possible.
|
||||
#
|
||||
# We only set it here in the CLI entrypoint, because changing to `spawn`
|
||||
# could break some existing code using vLLM as a library. `spawn` will cause
|
||||
# unexpected behavior if the code is not protected by
|
||||
# `if __name__ == "__main__":`.
|
||||
#
|
||||
# References:
|
||||
# - https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
# - https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing
|
||||
# - https://pytorch.org/docs/stable/multiprocessing.html#sharing-cuda-tensors
|
||||
# - https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html?highlight=multiprocessing#torch-multiprocessing-for-dataloaders
|
||||
if "VLLM_WORKER_MULTIPROC_METHOD" not in os.environ:
|
||||
logger.debug("Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'")
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
|
||||
def main():
|
||||
env_setup()
|
||||
cli_env_setup()
|
||||
|
||||
parser = FlexibleArgumentParser(description="vLLM CLI")
|
||||
parser.add_argument('-v',
|
||||
|
||||
Reference in New Issue
Block a user