[Core] Support fully transparent sleep mode (#11743)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-01-22 14:39:32 +08:00
committed by GitHub
parent 4004f144f3
commit 68ad4e3a8d
14 changed files with 877 additions and 40 deletions

View File

@@ -193,6 +193,17 @@ class ExecutorBase(ABC):
def stop_profile(self) -> None:
self.collective_rpc("stop_profile")
def sleep(self, level: int = 1):
if self.cache_config.enable_prefix_caching:
# TODO: support sleep with prefix caching
# by resetting the prefix cache state,
# after https://github.com/vllm-project/vllm/pull/12284
raise ValueError("Cannot sleep when prefix caching is enabled.")
self.collective_rpc("sleep", kwargs=dict(level=level))
def wake_up(self):
self.collective_rpc("wake_up")
def save_sharded_state(
self,
path: str,