[Realtime API] Adds minimal realtime API based on websockets (#33187)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Patrick von Platen
2026-01-30 11:41:29 +01:00
committed by GitHub
parent 1a7894dbdf
commit 10152d2194
21 changed files with 1316 additions and 48 deletions

View File

@@ -196,6 +196,13 @@ def build_app(args: Namespace, supported_tasks: tuple["SupportedTask", ...]) ->
register_translations_api_router(app)
if "realtime" in supported_tasks:
from vllm.entrypoints.openai.realtime.api_router import (
attach_router as register_realtime_api_router,
)
register_realtime_api_router(app)
if any(task in POOLING_TASKS for task in supported_tasks):
from vllm.entrypoints.pooling import register_pooling_api_routers
@@ -319,6 +326,11 @@ async def init_app_state(
engine_client, state, args, request_logger, supported_tasks
)
if "realtime" in supported_tasks:
from vllm.entrypoints.openai.realtime.api_router import init_realtime_state
init_realtime_state(engine_client, state, args, request_logger, supported_tasks)
if any(task in POOLING_TASKS for task in supported_tasks):
from vllm.entrypoints.pooling import init_pooling_state