[frontend] spawn engine process from api server process (#7484)
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
import asyncio
|
||||
import importlib
|
||||
import inspect
|
||||
import multiprocessing
|
||||
import re
|
||||
from argparse import Namespace
|
||||
from contextlib import asynccontextmanager
|
||||
from http import HTTPStatus
|
||||
from multiprocessing import Process
|
||||
from typing import AsyncIterator, Set
|
||||
|
||||
from fastapi import APIRouter, FastAPI, Request
|
||||
@@ -112,12 +112,15 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
|
||||
rpc_path)
|
||||
|
||||
# Start RPCServer in separate process (holds the AsyncLLMEngine).
|
||||
rpc_server_process = Process(target=run_rpc_server,
|
||||
args=(engine_args,
|
||||
UsageContext.OPENAI_API_SERVER,
|
||||
rpc_path))
|
||||
context = multiprocessing.get_context("spawn")
|
||||
# the current process might have CUDA context,
|
||||
# so we need to spawn a new process
|
||||
rpc_server_process = context.Process(
|
||||
target=run_rpc_server,
|
||||
args=(engine_args, UsageContext.OPENAI_API_SERVER, rpc_path))
|
||||
rpc_server_process.start()
|
||||
|
||||
logger.info("Started engine process with PID %d",
|
||||
rpc_server_process.pid)
|
||||
# Build RPCClient, which conforms to AsyncEngineClient Protocol.
|
||||
async_engine_client = AsyncEngineRPCClient(rpc_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user