diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py
index 0cd2419ca..3fbec3d39 100644
--- a/vllm/multimodal/budget.py
+++ b/vllm/multimodal/budget.py
@@ -72,9 +72,14 @@ class MultiModalBudget:
                 mm_counts=dict.fromkeys(active_modalities, 1),
             )
 
+        # Some models (e.g., Qwen3Omni with use_audio_in_video=True) share
+        # placeholders between modalities, so not all active modalities will
+        # have their own entry in the returned dict. We filter to only include
+        # modalities that have independent placeholder tokens.
         mm_max_toks_per_item = {
             modality: all_mm_max_toks_per_item[modality]
             for modality in active_modalities
+            if modality in all_mm_max_toks_per_item
         }
 
         encoder_compute_budget, encoder_cache_size = compute_mm_encoder_budget(