[Bugfix] Avoid repeatedly creating dummy data during engine startup (#17935)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -409,6 +409,9 @@ class LLMEngine:
|
||||
# the next step without re-scheduling.
|
||||
self._skip_scheduling_next_step = False
|
||||
|
||||
# Don't keep the dummy data in memory
|
||||
self.reset_mm_cache()
|
||||
|
||||
def _initialize_kv_caches(self) -> None:
|
||||
"""Initialize the KV cache in the worker(s).
|
||||
|
||||
@@ -913,6 +916,10 @@ class LLMEngine:
|
||||
"""
|
||||
return self.scheduler[virtual_engine].has_unfinished_seqs()
|
||||
|
||||
def reset_mm_cache(self) -> bool:
|
||||
"""Reset the multi-modal cache."""
|
||||
return self.input_preprocessor.mm_registry.reset_processor_cache()
|
||||
|
||||
def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
|
||||
"""Reset prefix cache for all devices."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user