[Realtime API] Adds minimal realtime API based on websockets (#33187)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Patrick von Platen
2026-01-30 11:41:29 +01:00
committed by GitHub
parent 1a7894dbdf
commit 10152d2194
21 changed files with 1316 additions and 48 deletions

View File

@@ -6,7 +6,7 @@ from collections.abc import AsyncGenerator, Iterable, Mapping
from typing import Any
from vllm.config import ModelConfig, VllmConfig
from vllm.inputs.data import PromptType
from vllm.inputs.data import PromptType, StreamingInput
from vllm.lora.request import LoRARequest
from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import IOProcessor
@@ -49,7 +49,7 @@ class EngineClient(ABC):
@abstractmethod
def generate(
self,
prompt: EngineCoreRequest | PromptType,
prompt: EngineCoreRequest | PromptType | AsyncGenerator[StreamingInput, None],
sampling_params: SamplingParams,
request_id: str,
*,