[Bugfix] Avoid repeatedly creating dummy data during engine startup (#17935)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-05-13 13:40:19 +08:00
committed by GitHub
parent 1df491c522
commit 61e0a506a3
15 changed files with 99 additions and 4 deletions

View File

@@ -409,6 +409,9 @@ class LLMEngine:
# the next step without re-scheduling.
self._skip_scheduling_next_step = False
# Don't keep the dummy data in memory
self.reset_mm_cache()
def _initialize_kv_caches(self) -> None:
"""Initialize the KV cache in the worker(s).
@@ -913,6 +916,10 @@ class LLMEngine:
"""
return self.scheduler[virtual_engine].has_unfinished_seqs()
def reset_mm_cache(self) -> bool:
"""Reset the multi-modal cache."""
return self.input_preprocessor.mm_registry.reset_processor_cache()
def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
"""Reset prefix cache for all devices."""