[Core] Support fully transparent sleep mode (#11743)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -197,6 +197,7 @@ class EngineArgs:
|
||||
kv_transfer_config: Optional[KVTransferConfig] = None
|
||||
|
||||
generation_config: Optional[str] = None
|
||||
enable_sleep_mode: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.tokenizer:
|
||||
@@ -955,6 +956,12 @@ class EngineArgs:
|
||||
"loaded from model. If set to a folder path, the generation config "
|
||||
"will be loaded from the specified folder path.")
|
||||
|
||||
parser.add_argument("--enable-sleep-mode",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable sleep mode for the engine. "
|
||||
"(only cuda platform is supported)")
|
||||
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
@@ -999,7 +1006,9 @@ class EngineArgs:
|
||||
override_neuron_config=self.override_neuron_config,
|
||||
override_pooler_config=self.override_pooler_config,
|
||||
logits_processor_pattern=self.logits_processor_pattern,
|
||||
generation_config=self.generation_config)
|
||||
generation_config=self.generation_config,
|
||||
enable_sleep_mode=self.enable_sleep_mode,
|
||||
)
|
||||
|
||||
def create_load_config(self) -> LoadConfig:
|
||||
return LoadConfig(
|
||||
|
||||
@@ -1818,6 +1818,16 @@ class LLMEngine:
|
||||
def stop_profile(self) -> None:
|
||||
self.model_executor.stop_profile()
|
||||
|
||||
def sleep(self, level: int = 1) -> None:
|
||||
assert self.vllm_config.model_config.enable_sleep_mode, (
|
||||
"Sleep mode is not enabled in the model config")
|
||||
self.model_executor.sleep(level=level)
|
||||
|
||||
def wake_up(self) -> None:
|
||||
assert self.vllm_config.model_config.enable_sleep_mode, (
|
||||
"Sleep mode is not enabled in the model config")
|
||||
self.model_executor.wake_up()
|
||||
|
||||
def check_health(self) -> None:
|
||||
if self.tokenizer:
|
||||
self.tokenizer.check_health()
|
||||
|
||||
Reference in New Issue
Block a user