diff --git a/requirements/common.txt b/requirements/common.txt index a569df882..26d53f80a 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -42,7 +42,6 @@ depyf==0.20.0 # required for profiling and debugging with compilation config cloudpickle # allows pickling lambda functions in model_executor/models/registry.py watchfiles # required for http server to monitor the updates of TLS files python-json-logger # Used by logging as per examples/others/logging_configuration.md -scipy # Required for phi-4-multimodal-instruct ninja # Required for xgrammar, rocm, tpu, xpu pybase64 # fast base64 implementation cbor2 # Required for cross-language serialization of hashable objects diff --git a/setup.py b/setup.py index ea0dabfb0..8c952e01a 100644 --- a/setup.py +++ b/setup.py @@ -978,12 +978,13 @@ setup( ext_modules=ext_modules, install_requires=get_requirements(), extras_require={ - "bench": ["pandas", "matplotlib", "seaborn", "datasets"], + "bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], "runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"], "audio": [ "librosa", + "scipy", "soundfile", "mistral_common[audio]", ], # Required for audio processing diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py index 6e339d2ef..813725d6d 100644 --- a/vllm/multimodal/audio.py +++ b/vllm/multimodal/audio.py @@ -27,6 +27,12 @@ try: except ImportError: soundfile = PlaceholderModule("soundfile") # type: ignore[assignment] + +try: + import scipy.signal as scipy_signal +except ImportError: + scipy_signal = PlaceholderModule("scipy").placeholder_attr("signal") # type: ignore[assignment] + # ============================================================ @@ -173,13 +179,10 @@ def resample_audio_scipy( orig_sr: float, target_sr: float, ): - # lazy import scipy.signal, otherwise it will crash doc build. - import scipy.signal - if orig_sr > target_sr: - return scipy.signal.resample_poly(audio, 1, orig_sr // target_sr) + return scipy_signal.resample_poly(audio, 1, orig_sr // target_sr) elif orig_sr < target_sr: - return scipy.signal.resample_poly(audio, target_sr // orig_sr, 1) + return scipy_signal.resample_poly(audio, target_sr // orig_sr, 1) return audio