[Realtime API] Adds minimal realtime API based on websockets (#33187)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Patrick von Platen
2026-01-30 11:41:29 +01:00
committed by GitHub
parent 1a7894dbdf
commit 10152d2194
21 changed files with 1316 additions and 48 deletions

View File

@@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import inspect
import math
from collections.abc import Iterable, Mapping, Sequence
from functools import cached_property, partial
@@ -20,7 +19,6 @@ from mistral_common.protocol.transcription.request import TranscriptionRequest
from mistral_common.tokens.tokenizers.audio import (
Audio,
AudioEncoder,
TranscriptionFormat,
)
from transformers import BatchFeature, TensorType, WhisperConfig
from transformers.tokenization_utils_base import TextInput
@@ -163,19 +161,10 @@ class VoxtralProcessorAdapter:
assert isinstance(audio, np.ndarray)
assert audio.ndim == 1
# pad if necessary
# TODO(Patrick) - remove once mistral-common is bumped
if (
self._audio_processor.audio_config.transcription_format
!= TranscriptionFormat.STREAMING
):
sig = inspect.signature(self._audio_processor.pad)
if "is_online_streaming" in sig.parameters:
audio = self._audio_processor.pad(
audio, self.sampling_rate, is_online_streaming=False
)
else:
audio = self._audio_processor.pad(audio, self.sampling_rate)
if not self._audio_processor.audio_config.is_streaming:
audio = self._audio_processor.pad(
audio, self.sampling_rate, is_online_streaming=False
)
audio_tokens = [self.begin_audio_token_id] + [
self.audio_token_id