[core] add sleep and wake up endpoint and v1 support (#12987)

Signed-off-by: youkaichao <youkaichao@gmail.com>
Signed-off-by: cennn <2523403608@qq.com>
Co-authored-by: cennn <2523403608@qq.com>
This commit is contained in:
youkaichao
2025-02-20 12:41:17 +08:00
committed by GitHub
parent 0d243f2a54
commit ba81163997
13 changed files with 160 additions and 9 deletions

View File

@@ -625,6 +625,24 @@ if envs.VLLM_SERVER_DEV_MODE:
await engine_client(raw_request).reset_prefix_cache()
return Response(status_code=200)
@router.post("/sleep")
async def sleep(raw_request: Request):
# get POST params
level = raw_request.query_params.get("level", "1")
logger.info("sleep the engine with level %s", level)
await engine_client(raw_request).sleep(int(level))
# FIXME: in v0 with frontend multiprocessing, the sleep command
# is sent but does not finish yet when we return a response.
return Response(status_code=200)
@router.post("/wake_up")
async def wake_up(raw_request: Request):
logger.info("wake up the engine")
await engine_client(raw_request).wake_up()
# FIXME: in v0 with frontend multiprocessing, the wake-up command
# is sent but does not finish yet when we return a response.
return Response(status_code=200)
@router.post("/invocations", dependencies=[Depends(validate_json_request)])
async def invocations(raw_request: Request):