[Bugfix][Frontend] Fix Issues Under High Load With zeromq Frontend (#7394)
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
@@ -135,6 +135,12 @@ async def build_async_engine_client(
|
||||
logger.info("Multiprocessing frontend to use %s for RPC Path.",
|
||||
rpc_path)
|
||||
|
||||
# Build RPCClient, which conforms to AsyncEngineClient Protocol.
|
||||
# NOTE: Actually, this is not true yet. We still need to support
|
||||
# embedding models via RPC (see TODO above)
|
||||
rpc_client = AsyncEngineRPCClient(rpc_path)
|
||||
async_engine_client = rpc_client # type: ignore
|
||||
|
||||
# Start RPCServer in separate process (holds the AsyncLLMEngine).
|
||||
context = multiprocessing.get_context("spawn")
|
||||
# the current process might have CUDA context,
|
||||
@@ -145,11 +151,6 @@ async def build_async_engine_client(
|
||||
rpc_server_process.start()
|
||||
logger.info("Started engine process with PID %d",
|
||||
rpc_server_process.pid)
|
||||
# Build RPCClient, which conforms to AsyncEngineClient Protocol.
|
||||
# NOTE: Actually, this is not true yet. We still need to support
|
||||
# embedding models via RPC (see TODO above)
|
||||
rpc_client = AsyncEngineRPCClient(rpc_path)
|
||||
async_engine_client = rpc_client # type: ignore
|
||||
|
||||
try:
|
||||
while True:
|
||||
|
||||
Reference in New Issue
Block a user