[frontend] spawn engine process from api server process (#7484)

This commit is contained in:
youkaichao
2024-08-13 15:40:17 -07:00
committed by GitHub
parent c5c7768264
commit 33e5d7e6b6
4 changed files with 51 additions and 49 deletions

View File

@@ -1,11 +1,11 @@
import asyncio
import importlib
import inspect
import multiprocessing
import re
from argparse import Namespace
from contextlib import asynccontextmanager
from http import HTTPStatus
from multiprocessing import Process
from typing import AsyncIterator, Set
from fastapi import APIRouter, FastAPI, Request
@@ -112,12 +112,15 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
rpc_path)
# Start RPCServer in separate process (holds the AsyncLLMEngine).
rpc_server_process = Process(target=run_rpc_server,
args=(engine_args,
UsageContext.OPENAI_API_SERVER,
rpc_path))
context = multiprocessing.get_context("spawn")
# the current process might have CUDA context,
# so we need to spawn a new process
rpc_server_process = context.Process(
target=run_rpc_server,
args=(engine_args, UsageContext.OPENAI_API_SERVER, rpc_path))
rpc_server_process.start()
logger.info("Started engine process with PID %d",
rpc_server_process.pid)
# Build RPCClient, which conforms to AsyncEngineClient Protocol.
async_engine_client = AsyncEngineRPCClient(rpc_path)