[Frontend] Improve Startup Failure UX (#7716)

This commit is contained in:
Robert Shaw
2024-08-21 15:53:01 -04:00
committed by GitHub
parent 91f4522cbf
commit 970dfdc01d
2 changed files with 36 additions and 18 deletions

View File

@@ -1,3 +1,5 @@
import time
import pytest
from vllm.entrypoints.openai.api_server import build_async_engine_client
@@ -8,19 +10,20 @@ from vllm.utils import FlexibleArgumentParser
@pytest.mark.asyncio
async def test_mp_crash_detection():
with pytest.raises(RuntimeError) as excinfo:
parser = FlexibleArgumentParser(
description="vLLM's remote OpenAI server.")
parser = make_arg_parser(parser)
args = parser.parse_args([])
# use an invalid tensor_parallel_size to trigger the
# error in the server
args.tensor_parallel_size = 65536
parser = FlexibleArgumentParser(description="vLLM's remote OpenAI server.")
parser = make_arg_parser(parser)
args = parser.parse_args([])
# use an invalid tensor_parallel_size to trigger the
# error in the server
args.tensor_parallel_size = 65536
async with build_async_engine_client(args):
pass
assert "The server process died before responding to the readiness probe"\
in str(excinfo.value)
start = time.perf_counter()
async with build_async_engine_client(args):
pass
end = time.perf_counter()
assert end - start < 60, ("Expected vLLM to gracefully shutdown in <60s "
"if there is an error in the startup.")
@pytest.mark.asyncio