[RL] Add Pause and Resume Generation for Asynchronous RL Training (#28037)
Signed-off-by: SamitHuang <285365963@qq.com> Signed-off-by: Samit <285365963@qq.com> Signed-off-by: samithuang <285365963@qq.com> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
@@ -394,6 +394,84 @@ async def get_server_load_metrics(request: Request):
|
||||
return JSONResponse(content={"server_load": request.app.state.server_load_metrics})
|
||||
|
||||
|
||||
@router.post("/pause")
|
||||
async def pause_generation(
|
||||
raw_request: Request,
|
||||
wait_for_inflight_requests: bool = Query(False),
|
||||
clear_cache: bool = Query(True),
|
||||
) -> JSONResponse:
|
||||
"""Pause generation requests to allow weight updates.
|
||||
|
||||
Args:
|
||||
wait_for_inflight_requests: When ``True`` waits for in-flight
|
||||
requests to finish before pausing. When ``False`` (default),
|
||||
aborts any in-flight requests immediately.
|
||||
clear_cache: Whether to clear KV/prefix caches after draining.
|
||||
"""
|
||||
|
||||
engine = engine_client(raw_request)
|
||||
|
||||
try:
|
||||
await engine.pause_generation(
|
||||
wait_for_inflight_requests=wait_for_inflight_requests,
|
||||
clear_cache=clear_cache,
|
||||
)
|
||||
return JSONResponse(
|
||||
content={"status": "paused"},
|
||||
status_code=HTTPStatus.OK.value,
|
||||
)
|
||||
|
||||
except ValueError as err:
|
||||
return JSONResponse(
|
||||
content={"error": str(err)},
|
||||
status_code=HTTPStatus.BAD_REQUEST.value,
|
||||
)
|
||||
except Exception as err: # pragma: no cover - defensive
|
||||
logger.exception("Failed to pause generation")
|
||||
return JSONResponse(
|
||||
content={"error": f"Failed to pause generation: {err}"},
|
||||
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/resume")
|
||||
async def resume_generation(raw_request: Request) -> JSONResponse:
|
||||
"""Resume generation after a pause."""
|
||||
|
||||
engine = engine_client(raw_request)
|
||||
|
||||
try:
|
||||
await engine.resume_generation()
|
||||
return JSONResponse(
|
||||
content={"status": "resumed"},
|
||||
status_code=HTTPStatus.OK.value,
|
||||
)
|
||||
except Exception as err: # pragma: no cover - defensive
|
||||
logger.exception("Failed to resume generation")
|
||||
return JSONResponse(
|
||||
content={"error": f"Failed to resume generation: {err}"},
|
||||
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/is_paused")
|
||||
async def is_paused(raw_request: Request) -> JSONResponse:
|
||||
"""Return the current pause status."""
|
||||
|
||||
engine = engine_client(raw_request)
|
||||
|
||||
try:
|
||||
paused = await engine.is_paused()
|
||||
except Exception as err: # pragma: no cover - defensive
|
||||
logger.exception("Failed to fetch pause status")
|
||||
return JSONResponse(
|
||||
content={"error": f"Failed to fetch pause status: {err}"},
|
||||
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
|
||||
)
|
||||
|
||||
return JSONResponse(content={"is_paused": paused})
|
||||
|
||||
|
||||
@router.post(
|
||||
"/tokenize",
|
||||
dependencies=[Depends(validate_json_request)],
|
||||
|
||||
Reference in New Issue
Block a user