From 18e7cbbb158a86bdc76585e64ada795bf1c0d435 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 3 Feb 2026 23:57:56 +0800 Subject: [PATCH] [Bugfix] Fix startup hang for Granite Speech (#33699) Signed-off-by: DarkLight1337 --- vllm/multimodal/budget.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py index 3fbec3d39..1380ec1ba 100644 --- a/vllm/multimodal/budget.py +++ b/vllm/multimodal/budget.py @@ -54,17 +54,17 @@ class MultiModalBudget: self.max_model_len = model_config.max_model_len self.max_num_reqs = scheduler_config.max_num_seqs - cache = mm_registry.processor_only_cache_from_config(vllm_config) - processor = mm_registry.create_processor(model_config, cache=cache) - - self.cache = cache - self.mm_limits = mm_limits = processor.info.allowed_mm_limits - - active_modalities = { - modality for modality, limit in mm_limits.items() if limit > 0 - } - with set_default_torch_num_threads(): # Avoid hang during startup + cache = mm_registry.processor_only_cache_from_config(vllm_config) + processor = mm_registry.create_processor(model_config, cache=cache) + + self.cache = cache + self.mm_limits = mm_limits = processor.info.allowed_mm_limits + + active_modalities = { + modality for modality, limit in mm_limits.items() if limit > 0 + } + all_mm_max_toks_per_item = get_mm_max_toks_per_item( model_config, mm_registry,