[Feature] Add basic metrics for /realtime endpoint (#35500)

Signed-off-by: Thomas Pouget-Abadie <thomaspou@microsoft.com>
Signed-off-by: pougetat <thomas.pougetabadie@gmail.com>
Co-authored-by: Thomas Pouget-Abadie <thomaspou@microsoft.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
pougetat
2026-03-04 03:56:32 -08:00
committed by GitHub
parent d6e04f4c43
commit 1659b2e058
2 changed files with 86 additions and 0 deletions

View File

@@ -264,6 +264,14 @@ def build_app(
# Add scaling middleware to check for scaling state
app.add_middleware(ScalingMiddleware)
if "realtime" in supported_tasks:
# Add WebSocket metrics middleware
from vllm.entrypoints.openai.realtime.metrics import (
WebSocketMetricsMiddleware,
)
app.add_middleware(WebSocketMetricsMiddleware)
if envs.VLLM_DEBUG_LOG_API_SERVER_RESPONSE:
logger.warning(
"CAUTION: Enabling log response in the API Server. "