[Frontend] Add /v1/audio/transcriptions OpenAI API endpoint (#12909)

2025-02-13 16:23:45 +01:00
parent 37dfa60037
commit d84cef76eb
20 changed files with 910 additions and 19 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -54,17 +54,18 @@ _POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
 _MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120

 TaskOption = Literal["auto", "generate", "embedding", "embed", "classify",
-                     "score", "reward"]
+                     "score", "reward", "transcription"]

 _ResolvedTask = Literal["generate", "embed", "classify", "score", "reward",
-                        "draft"]
+                        "draft", "transcription"]

-RunnerType = Literal["generate", "pooling", "draft"]
+RunnerType = Literal["generate", "pooling", "draft", "transcription"]

 _RUNNER_TASKS: Dict[RunnerType, List[_ResolvedTask]] = {
    "generate": ["generate"],
    "pooling": ["embed", "classify", "score", "reward"],
    "draft": ["draft"],
+    "transcription": ["transcription"],
 }

 _TASK_RUNNER: Dict[_ResolvedTask, RunnerType] = {
@@ -484,6 +485,8 @@ class ModelConfig:
            return "embed"
        if ModelRegistry.is_cross_encoder_model(architectures):
            return "score"
+        if ModelRegistry.is_transcription_model(architectures):
+            return "transcription"

        suffix_to_preferred_task: List[Tuple[str, _ResolvedTask]] = [
            # Other models follow this pattern
@@ -516,6 +519,8 @@ class ModelConfig:
        runner_support: Dict[RunnerType, bool] = {
            # NOTE: Listed from highest to lowest priority,
            # in case the model supports multiple of them
+            "transcription":
+            ModelRegistry.is_transcription_model(architectures),
            "generate": ModelRegistry.is_text_generation_model(architectures),
            "pooling": ModelRegistry.is_pooling_model(architectures),
        }