[Frontend] Improve Startup Failure UX (#7716)
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.openai.api_server import build_async_engine_client
|
||||
@@ -8,19 +10,20 @@ from vllm.utils import FlexibleArgumentParser
|
||||
@pytest.mark.asyncio
|
||||
async def test_mp_crash_detection():
|
||||
|
||||
with pytest.raises(RuntimeError) as excinfo:
|
||||
parser = FlexibleArgumentParser(
|
||||
description="vLLM's remote OpenAI server.")
|
||||
parser = make_arg_parser(parser)
|
||||
args = parser.parse_args([])
|
||||
# use an invalid tensor_parallel_size to trigger the
|
||||
# error in the server
|
||||
args.tensor_parallel_size = 65536
|
||||
parser = FlexibleArgumentParser(description="vLLM's remote OpenAI server.")
|
||||
parser = make_arg_parser(parser)
|
||||
args = parser.parse_args([])
|
||||
# use an invalid tensor_parallel_size to trigger the
|
||||
# error in the server
|
||||
args.tensor_parallel_size = 65536
|
||||
|
||||
async with build_async_engine_client(args):
|
||||
pass
|
||||
assert "The server process died before responding to the readiness probe"\
|
||||
in str(excinfo.value)
|
||||
start = time.perf_counter()
|
||||
async with build_async_engine_client(args):
|
||||
pass
|
||||
end = time.perf_counter()
|
||||
|
||||
assert end - start < 60, ("Expected vLLM to gracefully shutdown in <60s "
|
||||
"if there is an error in the startup.")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user