[2/N] API Server: Avoid ulimit footgun (#11530)

This commit is contained in:
Robert Shaw
2024-12-26 18:43:05 -05:00
committed by GitHub
parent 2072924d14
commit 55fb97f7bd
3 changed files with 26 additions and 2 deletions

View File

@@ -68,7 +68,7 @@ from vllm.entrypoints.utils import with_cancellation
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address)
is_valid_ipv6_address, set_ulimit)
from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds
@@ -727,6 +727,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
sock_addr = (args.host or "", args.port)
sock = create_server_socket(sock_addr)
# workaround to avoid footguns where uvicorn drops requests with too
# many concurrent requests active
set_ulimit()
def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing
raise KeyboardInterrupt("terminated")