[Frontend] Add /v1/audio/transcriptions OpenAI API endpoint (#12909)

This commit is contained in:
Nicolò Lucchesi
2025-02-13 16:23:45 +01:00
committed by GitHub
parent 37dfa60037
commit d84cef76eb
20 changed files with 910 additions and 19 deletions

View File

@@ -54,17 +54,18 @@ _POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
TaskOption = Literal["auto", "generate", "embedding", "embed", "classify",
"score", "reward"]
"score", "reward", "transcription"]
_ResolvedTask = Literal["generate", "embed", "classify", "score", "reward",
"draft"]
"draft", "transcription"]
RunnerType = Literal["generate", "pooling", "draft"]
RunnerType = Literal["generate", "pooling", "draft", "transcription"]
_RUNNER_TASKS: Dict[RunnerType, List[_ResolvedTask]] = {
"generate": ["generate"],
"pooling": ["embed", "classify", "score", "reward"],
"draft": ["draft"],
"transcription": ["transcription"],
}
_TASK_RUNNER: Dict[_ResolvedTask, RunnerType] = {
@@ -484,6 +485,8 @@ class ModelConfig:
return "embed"
if ModelRegistry.is_cross_encoder_model(architectures):
return "score"
if ModelRegistry.is_transcription_model(architectures):
return "transcription"
suffix_to_preferred_task: List[Tuple[str, _ResolvedTask]] = [
# Other models follow this pattern
@@ -516,6 +519,8 @@ class ModelConfig:
runner_support: Dict[RunnerType, bool] = {
# NOTE: Listed from highest to lowest priority,
# in case the model supports multiple of them
"transcription":
ModelRegistry.is_transcription_model(architectures),
"generate": ModelRegistry.is_text_generation_model(architectures),
"pooling": ModelRegistry.is_pooling_model(architectures),
}