[Frontend][Docs] Transcription API streaming (#13301)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-03-06 11:39:35 +01:00
committed by GitHub
parent 69ff99fdcd
commit fa82b93853
5 changed files with 297 additions and 26 deletions

View File

@@ -1285,6 +1285,21 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
usage: Optional[UsageInfo] = Field(default=None)
class TranscriptionResponseStreamChoice(OpenAIBaseModel):
delta: DeltaMessage
finish_reason: Optional[str] = None
stop_reason: Optional[Union[int, str]] = None
class TranscriptionStreamResponse(OpenAIBaseModel):
id: str = Field(default_factory=lambda: f"trsc-{random_uuid()}")
object: Literal["transcription.chunk"] = "transcription.chunk"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: list[TranscriptionResponseStreamChoice]
usage: Optional[UsageInfo] = Field(default=None)
class BatchRequestInput(OpenAIBaseModel):
"""
The per-line object of the batch input file.
@@ -1510,6 +1525,15 @@ class TranscriptionRequest(OpenAIBaseModel):
timestamps incurs additional latency.
"""
stream: Optional[bool] = False
"""Custom field not present in the original OpenAI definition. When set,
it will enable output to be streamed in a similar fashion as the Chat
Completion endpoint.
"""
# Flattened stream option to simplify form data.
stream_include_usage: Optional[bool] = False
stream_continuous_usage_stats: Optional[bool] = False
# Default sampling parameters for transcription requests.
_DEFAULT_SAMPLING_PARAMS: dict = {
"temperature": 0,
@@ -1530,7 +1554,21 @@ class TranscriptionRequest(OpenAIBaseModel):
"temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"])
return SamplingParams.from_optional(temperature=temperature,
max_tokens=max_tokens)
max_tokens=max_tokens,
output_kind=RequestOutputKind.DELTA
if self.stream \
else RequestOutputKind.FINAL_ONLY)
@model_validator(mode="before")
@classmethod
def validate_stream_options(cls, data):
stream_opts = ["stream_include_usage", "stream_continuous_usage_stats"]
stream = data.get("stream", False)
if any(bool(data.get(so, False)) for so in stream_opts) and not stream:
raise ValueError(
"Stream options can only be defined when `stream=True`.")
return data
# Transcription response objects