[Voxtral models] Skip warm-up to skip confusing error message in warm-up (#33576)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Patrick von Platen
2026-02-03 16:22:34 +01:00
committed by GitHub
parent 5c4f2dd6ef
commit f0d5251715
3 changed files with 10 additions and 3 deletions

View File

@@ -138,6 +138,9 @@ class OpenAISpeechToText(OpenAIServing):
if not supports_transcription(self.model_cls): if not supports_transcription(self.model_cls):
return return
if getattr(self.model_cls, "skip_warmup_audio_preprocessing", False):
return
try: try:
warmup_start = time.perf_counter() warmup_start = time.perf_counter()
logger.info("Warming up audio preprocessing libraries...") logger.info("Warming up audio preprocessing libraries...")
@@ -150,9 +153,7 @@ class OpenAISpeechToText(OpenAIServing):
_ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate) _ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate)
# Warm up mel-spectrogram computation with model-specific parameters # Warm up mel-spectrogram computation with model-specific parameters
from vllm.transformers_utils.processor import ( from vllm.transformers_utils.processor import cached_processor_from_config
cached_processor_from_config,
)
processor = cached_processor_from_config(self.model_config) processor = cached_processor_from_config(self.model_config)
feature_extractor = None feature_extractor = None

View File

@@ -335,6 +335,9 @@ class VoxtralForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
): ):
supported_languages = ISO639_1_SUPPORTED_LANGS supported_languages = ISO639_1_SUPPORTED_LANGS
# transformers' currently has limited support for MistralCommon backend
# and cached_get_processor. Let's skip until fixed
skip_warmup_audio_preprocessing = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],

View File

@@ -218,6 +218,9 @@ class VoxtralRealtimeBuffer:
@support_torch_compile @support_torch_compile
class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime): class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime):
requires_raw_input_tokens = True requires_raw_input_tokens = True
# transformers' currently has limited support for MistralCommon backend
# and cached_get_processor. Let's skip until fixed
skip_warmup_audio_preprocessing = True
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)