[frontend] spawn engine process from api server process (#7484)

2024-08-13 15:40:17 -07:00
parent c5c7768264
commit 33e5d7e6b6
4 changed files with 51 additions and 49 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -1,11 +1,11 @@
 import asyncio
 import importlib
 import inspect
+import multiprocessing
 import re
 from argparse import Namespace
 from contextlib import asynccontextmanager
 from http import HTTPStatus
-from multiprocessing import Process
 from typing import AsyncIterator, Set

 from fastapi import APIRouter, FastAPI, Request
@@ -112,12 +112,15 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
                    rpc_path)

        # Start RPCServer in separate process (holds the AsyncLLMEngine).
-        rpc_server_process = Process(target=run_rpc_server,
-                                     args=(engine_args,
-                                           UsageContext.OPENAI_API_SERVER,
-                                           rpc_path))
+        context = multiprocessing.get_context("spawn")
+        # the current process might have CUDA context,
+        # so we need to spawn a new process
+        rpc_server_process = context.Process(
+            target=run_rpc_server,
+            args=(engine_args, UsageContext.OPENAI_API_SERVER, rpc_path))
        rpc_server_process.start()
-
+        logger.info("Started engine process with PID %d",
+                    rpc_server_process.pid)
        # Build RPCClient, which conforms to AsyncEngineClient Protocol.
        async_engine_client = AsyncEngineRPCClient(rpc_path)