[Frontend] Add /collective_rpc API endpoint (#23075)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
22quinn
2025-08-19 10:29:32 -07:00
committed by GitHub
parent 03d4235fd2
commit f7cf5b512e
4 changed files with 126 additions and 1 deletions

View File

@@ -1044,6 +1044,34 @@ if envs.VLLM_SERVER_DEV_MODE:
is_sleeping = await engine_client(raw_request).is_sleeping()
return JSONResponse(content={"is_sleeping": is_sleeping})
@router.post("/collective_rpc")
async def collective_rpc(raw_request: Request):
try:
body = await raw_request.json()
except json.JSONDecodeError as e:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST.value,
detail=f"JSON decode error: {e}") from e
method = body.get("method")
if method is None:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST.value,
detail="Missing 'method' in request body")
# For security reason, only serialized string args/kwargs are passed.
# User-defined `method` is responsible for deseralization if needed.
args: list[str] = body.get("args", [])
kwargs: dict[str, str] = body.get("kwargs", {})
timeout: Optional[float] = body.get("timeout")
results = await engine_client(raw_request).collective_rpc(
method=method, timeout=timeout, args=tuple(args), kwargs=kwargs)
if results is None:
return Response(status_code=200)
response: list[Any] = []
for result in results:
if result is None or isinstance(result, (dict, list)):
response.append(result)
else:
response.append(str(result))
return JSONResponse(content={"results": response})
@router.post("/scale_elastic_ep",
dependencies=[Depends(validate_json_request)],