[Realtime API] Adds minimal realtime API based on websockets (#33187)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Patrick von Platen
2026-01-30 11:41:29 +01:00
committed by GitHub
parent 1a7894dbdf
commit 10152d2194
21 changed files with 1316 additions and 48 deletions

View File

@@ -19,11 +19,12 @@ import pytest
import pytest_asyncio
from vllm import SamplingParams
from vllm.inputs.data import StreamingInput
from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
from vllm.sampling_params import RequestOutputKind
from vllm.utils.torch_utils import set_default_torch_num_threads
from vllm.v1.engine.async_llm import AsyncLLM, StreamingInput
from vllm.v1.engine.async_llm import AsyncLLM
if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.", allow_module_level=True)