[Frontend] Multithreaded async multimodal load_bytes (#22710)

Signed-off-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com>
Co-authored-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com>
This commit is contained in:
milesial
2025-08-13 06:09:26 -07:00
committed by GitHub
parent b159c0a67a
commit 20d65aa755
2 changed files with 27 additions and 6 deletions

View File

@@ -63,6 +63,7 @@ if TYPE_CHECKING:
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
VLLM_VIDEO_FETCH_TIMEOUT: int = 30
VLLM_AUDIO_FETCH_TIMEOUT: int = 10
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
VLLM_MM_INPUT_CACHE_GIB: int = 4
@@ -555,6 +556,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_AUDIO_FETCH_TIMEOUT":
lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
# Max number of workers for the thread pool handling
# media bytes loading. Set to 1 to disable parallel processing.
# Default is 8
"VLLM_MEDIA_LOADING_THREAD_COUNT":
lambda: int(os.getenv("VLLM_MEDIA_LOADING_THREAD_COUNT", "8")),
# Maximum filesize in MB for a single audio file when processing
# speech-to-text requests. Files larger than this will be rejected.
# Default is 25 MB