[Frontend] track responsesAPI server_load (#32323)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2026-01-14 20:00:37 +08:00
committed by GitHub
parent ce0946249d
commit 00e6402d56
2 changed files with 8 additions and 0 deletions

View File

@@ -259,6 +259,10 @@ def engine_client(request: Request) -> EngineClient:
async def get_server_load_metrics(request: Request):
# This endpoint returns the current server load metrics.
# It tracks requests utilizing the GPU from the following routes:
# - /v1/responses
# - /v1/responses/{response_id}
# - /v1/responses/{response_id}/cancel
# - /v1/messages
# - /v1/chat/completions
# - /v1/completions
# - /v1/audio/transcriptions