[Frontend] Gemma3n audio transcriptions/translations endpoint (#23735)

Signed-off-by: NickLucche <nlucches@redhat.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-09-01 12:07:46 +02:00
parent 107284959a
commit d46934b229
9 changed files with 189 additions and 63 deletions
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -4,7 +4,7 @@
 import math
 from collections.abc import Iterable, Mapping, Sequence
 from contextlib import nullcontext
-from typing import Optional, TypedDict, Union, cast
+from typing import Literal, Optional, TypedDict, Union, cast

 import numpy as np
 import torch
@@ -783,8 +783,9 @@ class WhisperForConditionalGeneration(nn.Module, SupportsTranscription,
            model_config: ModelConfig,  # not needed here
            stt_config: SpeechToTextConfig,
            language: Optional[str],
-            task_type: str,
-            request_prompt: str) -> PromptType:
+            task_type: Literal["transcribe", "translate"],
+            request_prompt: str,
+            to_language: Optional[str]) -> PromptType:
        if language is None:
            raise ValueError(
                "Language must be specified when creating the Whisper prompt")