[Frontend] Kill the server on engine death (#6594)

Signed-off-by: Joe Runde <joe@joerun.de>
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2024-08-08 10:47:48 -06:00
committed by GitHub
parent 5fb4a3f678
commit 21b9c49aa3
8 changed files with 136 additions and 14 deletions

View File

@@ -96,14 +96,17 @@ class AsyncEngineRPCServer:
async def abort(self, identity, request: RPCAbortRequest):
"""Abort request and notify the client of success."""
# Abort the request in the llm engine.
await self.engine.abort(request.request_id)
# Send confirmation to the client.
await self.socket.send_multipart([
identity,
cloudpickle.dumps(VLLM_RPC_SUCCESS_STR),
])
try:
# Abort the request in the llm engine.
await self.engine.abort(request.request_id)
except Exception:
logger.warning("Failed to abort request %s", request.request_id)
finally:
# Send confirmation to the client.
await self.socket.send_multipart([
identity,
cloudpickle.dumps(VLLM_RPC_SUCCESS_STR),
])
async def generate(self, identity, generate_request: RPCGenerateRequest):
try: