From 18e7cbbb158a86bdc76585e64ada795bf1c0d435 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 3 Feb 2026 23:57:56 +0800
Subject: [PATCH] [Bugfix] Fix startup hang for Granite Speech (#33699)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/multimodal/budget.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py
index 3fbec3d39..1380ec1ba 100644
--- a/vllm/multimodal/budget.py
+++ b/vllm/multimodal/budget.py
@@ -54,17 +54,17 @@ class MultiModalBudget:
         self.max_model_len = model_config.max_model_len
         self.max_num_reqs = scheduler_config.max_num_seqs
 
-        cache = mm_registry.processor_only_cache_from_config(vllm_config)
-        processor = mm_registry.create_processor(model_config, cache=cache)
-
-        self.cache = cache
-        self.mm_limits = mm_limits = processor.info.allowed_mm_limits
-
-        active_modalities = {
-            modality for modality, limit in mm_limits.items() if limit > 0
-        }
-
         with set_default_torch_num_threads():  # Avoid hang during startup
+            cache = mm_registry.processor_only_cache_from_config(vllm_config)
+            processor = mm_registry.create_processor(model_config, cache=cache)
+
+            self.cache = cache
+            self.mm_limits = mm_limits = processor.info.allowed_mm_limits
+
+            active_modalities = {
+                modality for modality, limit in mm_limits.items() if limit > 0
+            }
+
             all_mm_max_toks_per_item = get_mm_max_toks_per_item(
                 model_config,
                 mm_registry,