adapt voxtral (#31095)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Patrick von Platen
2025-12-23 14:31:55 +01:00
committed by GitHub
parent b10d47e0e0
commit 3faa8bee57
12 changed files with 739 additions and 98 deletions

View File

@@ -46,6 +46,7 @@ from .interfaces import (
has_noops,
is_attention_free,
is_hybrid,
requires_raw_input_tokens,
supports_cross_encoding,
supports_mamba_prefix_caching,
supports_multimodal,
@@ -422,6 +423,7 @@ _MULTIMODAL_MODELS = {
),
"UltravoxModel": ("ultravox", "UltravoxModel"),
"VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501
"VoxtralStreamingGeneration": ("voxtral_streaming", "VoxtralStreamingGeneration"), # noqa: E501
# [Encoder-decoder]
"WhisperForConditionalGeneration": ("whisper", "WhisperForConditionalGeneration"), # noqa: E501
}
@@ -539,6 +541,7 @@ class _ModelInfo:
supports_cross_encoding: bool
supports_multimodal: bool
supports_multimodal_raw_input_only: bool
requires_raw_input_tokens: bool
supports_multimodal_encoder_tp_data: bool
supports_pp: bool
has_inner_state: bool
@@ -562,6 +565,7 @@ class _ModelInfo:
supports_multimodal_raw_input_only=supports_multimodal_raw_input_only(
model
),
requires_raw_input_tokens=requires_raw_input_tokens(model),
supports_multimodal_encoder_tp_data=supports_multimodal_encoder_tp_data(
model
),