[Voxtral models] Skip warm-up to skip confusing error message in warm-up (#33576)
Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
committed by
GitHub
parent
5c4f2dd6ef
commit
f0d5251715
@@ -138,6 +138,9 @@ class OpenAISpeechToText(OpenAIServing):
|
|||||||
if not supports_transcription(self.model_cls):
|
if not supports_transcription(self.model_cls):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if getattr(self.model_cls, "skip_warmup_audio_preprocessing", False):
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
warmup_start = time.perf_counter()
|
warmup_start = time.perf_counter()
|
||||||
logger.info("Warming up audio preprocessing libraries...")
|
logger.info("Warming up audio preprocessing libraries...")
|
||||||
@@ -150,9 +153,7 @@ class OpenAISpeechToText(OpenAIServing):
|
|||||||
_ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate)
|
_ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate)
|
||||||
|
|
||||||
# Warm up mel-spectrogram computation with model-specific parameters
|
# Warm up mel-spectrogram computation with model-specific parameters
|
||||||
from vllm.transformers_utils.processor import (
|
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||||
cached_processor_from_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
processor = cached_processor_from_config(self.model_config)
|
processor = cached_processor_from_config(self.model_config)
|
||||||
feature_extractor = None
|
feature_extractor = None
|
||||||
|
|||||||
@@ -335,6 +335,9 @@ class VoxtralForConditionalGeneration(
|
|||||||
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
|
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
|
||||||
):
|
):
|
||||||
supported_languages = ISO639_1_SUPPORTED_LANGS
|
supported_languages = ISO639_1_SUPPORTED_LANGS
|
||||||
|
# transformers' currently has limited support for MistralCommon backend
|
||||||
|
# and cached_get_processor. Let's skip until fixed
|
||||||
|
skip_warmup_audio_preprocessing = True
|
||||||
|
|
||||||
packed_modules_mapping = {
|
packed_modules_mapping = {
|
||||||
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
||||||
|
|||||||
@@ -218,6 +218,9 @@ class VoxtralRealtimeBuffer:
|
|||||||
@support_torch_compile
|
@support_torch_compile
|
||||||
class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime):
|
class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime):
|
||||||
requires_raw_input_tokens = True
|
requires_raw_input_tokens = True
|
||||||
|
# transformers' currently has limited support for MistralCommon backend
|
||||||
|
# and cached_get_processor. Let's skip until fixed
|
||||||
|
skip_warmup_audio_preprocessing = True
|
||||||
|
|
||||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||||
|
|||||||
Reference in New Issue
Block a user