[KVConnector][Feature] Support KV connector cache reset via /reset_prefix_cache (#27170)
Signed-off-by: tovam <tovam@pliops.com> Signed-off-by: Tova Movshovitz <tovam@pliops.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -152,7 +152,9 @@ class SchedulerInterface(ABC):
|
||||
return self.has_unfinished_requests() or self.has_finished_requests()
|
||||
|
||||
@abstractmethod
|
||||
def reset_prefix_cache(self, reset_running_requests: bool = False) -> bool:
|
||||
def reset_prefix_cache(
|
||||
self, reset_running_requests: bool = False, reset_connector: bool = False
|
||||
) -> bool:
|
||||
"""Reset the prefix cache for KV cache.
|
||||
|
||||
This is particularly required when the model weights are live-updated.
|
||||
|
||||
@@ -1380,7 +1380,9 @@ class Scheduler(SchedulerInterface):
|
||||
def has_finished_requests(self) -> bool:
|
||||
return len(self.finished_req_ids) > 0
|
||||
|
||||
def reset_prefix_cache(self, reset_running_requests: bool = False) -> bool:
|
||||
def reset_prefix_cache(
|
||||
self, reset_running_requests: bool = False, reset_connector: bool = False
|
||||
) -> bool:
|
||||
"""Reset the KV prefix cache.
|
||||
|
||||
If reset_running_requests is True, all the running requests will be
|
||||
@@ -1418,8 +1420,26 @@ class Scheduler(SchedulerInterface):
|
||||
"the presence of running requests waiting for remote KV transfer, "
|
||||
"which is not supported yet."
|
||||
)
|
||||
|
||||
if reset_connector:
|
||||
reset_successful = self.reset_connector_cache() and reset_successful
|
||||
|
||||
return reset_successful
|
||||
|
||||
def reset_connector_cache(self) -> bool:
|
||||
if self.connector is None:
|
||||
logger.warning("reset_connector called but no KV connector is configured.")
|
||||
return False
|
||||
|
||||
if self.connector.reset_cache() is False:
|
||||
return False
|
||||
|
||||
if self.log_stats:
|
||||
assert self.connector_prefix_cache_stats is not None
|
||||
self.connector_prefix_cache_stats.reset = True
|
||||
|
||||
return True
|
||||
|
||||
def make_stats(
|
||||
self,
|
||||
spec_decoding_stats: SpecDecodingStats | None = None,
|
||||
|
||||
Reference in New Issue
Block a user