[Core] Support fully transparent sleep mode (#11743)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-01-22 14:39:32 +08:00
committed by GitHub
parent 4004f144f3
commit 68ad4e3a8d
14 changed files with 877 additions and 40 deletions

View File

@@ -197,6 +197,7 @@ class EngineArgs:
kv_transfer_config: Optional[KVTransferConfig] = None
generation_config: Optional[str] = None
enable_sleep_mode: bool = False
def __post_init__(self):
if not self.tokenizer:
@@ -955,6 +956,12 @@ class EngineArgs:
"loaded from model. If set to a folder path, the generation config "
"will be loaded from the specified folder path.")
parser.add_argument("--enable-sleep-mode",
action="store_true",
default=False,
help="Enable sleep mode for the engine. "
"(only cuda platform is supported)")
return parser
@classmethod
@@ -999,7 +1006,9 @@ class EngineArgs:
override_neuron_config=self.override_neuron_config,
override_pooler_config=self.override_pooler_config,
logits_processor_pattern=self.logits_processor_pattern,
generation_config=self.generation_config)
generation_config=self.generation_config,
enable_sleep_mode=self.enable_sleep_mode,
)
def create_load_config(self) -> LoadConfig:
return LoadConfig(

View File

@@ -1818,6 +1818,16 @@ class LLMEngine:
def stop_profile(self) -> None:
self.model_executor.stop_profile()
def sleep(self, level: int = 1) -> None:
assert self.vllm_config.model_config.enable_sleep_mode, (
"Sleep mode is not enabled in the model config")
self.model_executor.sleep(level=level)
def wake_up(self) -> None:
assert self.vllm_config.model_config.enable_sleep_mode, (
"Sleep mode is not enabled in the model config")
self.model_executor.wake_up()
def check_health(self) -> None:
if self.tokenizer:
self.tokenizer.check_health()