[Frontend] add 'verbose_json' and 'timestamp' feature on Whisper Transcription/Translation (#24209)

Signed-off-by: sangbumlikeagod <oironese@naver.com> Signed-off-by: sangbumlikeagod <98077576+sangbumlikeagod@users.noreply.github.com>
2025-12-02 02:19:17 +09:00
parent 5d43f7372e
commit 092bb73b8a
8 changed files with 224 additions and 23 deletions
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -837,6 +837,10 @@ class SupportsTranscription(Protocol):
    Transcription models can opt out of text generation by setting this to
    `True`.
    """
+    supports_segment_timestamp: ClassVar[bool] = False
+    """
+    Enables the segment timestamp option for supported models by setting this to `True`.
+    """

    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -791,6 +791,7 @@ class WhisperForConditionalGeneration(

    # Whisper only supports audio-conditioned generation.
    supports_transcription_only = True
+    supports_segment_timestamp = True
    supported_languages = ISO639_1_SUPPORTED_LANGS

    @classmethod