[Bugfix] Avoid repeatedly creating dummy data during engine startup (#17935)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-05-13 13:40:19 +08:00
parent 1df491c522
commit 61e0a506a3
15 changed files with 99 additions and 4 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -409,6 +409,9 @@ class LLMEngine:
        # the next step without re-scheduling.
        self._skip_scheduling_next_step = False

+        # Don't keep the dummy data in memory
+        self.reset_mm_cache()
+
    def _initialize_kv_caches(self) -> None:
        """Initialize the KV cache in the worker(s).

@@ -913,6 +916,10 @@ class LLMEngine:
        """
        return self.scheduler[virtual_engine].has_unfinished_seqs()

+    def reset_mm_cache(self) -> bool:
+        """Reset the multi-modal cache."""
+        return self.input_preprocessor.mm_registry.reset_processor_cache()
+
    def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
        """Reset prefix cache for all devices."""