[V1] [4/N] API Server: ZMQ/MP Utilities (#11541)

This commit is contained in:
Robert Shaw
2024-12-27 20:45:08 -05:00
committed by GitHub
parent a60731247f
commit df04dffade
12 changed files with 242 additions and 210 deletions

View File

@@ -68,7 +68,7 @@ from vllm.entrypoints.utils import with_cancellation
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address, set_ulimit)
is_valid_ipv6_address, kill_process_tree, set_ulimit)
from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds
@@ -737,6 +737,15 @@ async def run_server(args, **uvicorn_kwargs) -> None:
signal.signal(signal.SIGTERM, signal_handler)
# The child processes will send SIGQUIT to this process when
# any error happens. This process then clean up the whole tree.
# TODO(rob): move this into AsyncLLM.__init__ once we remove
# the context manager below.
def sigquit_handler(signum, frame):
kill_process_tree(os.getpid())
signal.signal(signal.SIGQUIT, sigquit_handler)
async with build_async_engine_client(args) as engine_client:
app = build_app(args)