[Bugfix] set VLLM_WORKER_MULTIPROC_METHOD=spawn for vllm.entrypoionts.openai.api_server (#15700)

Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com>
This commit is contained in:
Jinzhen Lin
2025-03-29 12:12:26 +08:00
committed by GitHub
parent 8427f70493
commit 5b800f0932
3 changed files with 31 additions and 27 deletions

View File

@@ -82,7 +82,8 @@ from vllm.entrypoints.openai.serving_tokenization import (
from vllm.entrypoints.openai.serving_transcription import (
OpenAIServingTranscription)
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.entrypoints.utils import load_aware_call, with_cancellation
from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
with_cancellation)
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.transformers_utils.config import (
@@ -1106,6 +1107,7 @@ if __name__ == "__main__":
# NOTE(simon):
# This section should be in sync with vllm/entrypoints/cli/main.py for CLI
# entrypoints.
cli_env_setup()
parser = FlexibleArgumentParser(
description="vLLM OpenAI-Compatible RESTful API server.")
parser = make_arg_parser(parser)