[Core] Expose API endpoint /is_sleeping (#14312)
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
This commit is contained in:
@@ -407,6 +407,9 @@ class AsyncLLM(EngineClient):
|
||||
async def wake_up(self) -> None:
|
||||
await self.engine_core.wake_up_async()
|
||||
|
||||
async def is_sleeping(self) -> bool:
|
||||
return await self.engine_core.is_sleeping_async()
|
||||
|
||||
async def add_lora(self, lora_request: LoRARequest) -> bool:
|
||||
"""Load a new LoRA adapter into the engine for future requests."""
|
||||
return await self.engine_core.add_lora_async(lora_request)
|
||||
|
||||
@@ -253,6 +253,9 @@ class EngineCore:
|
||||
def wake_up(self):
|
||||
self.model_executor.wake_up()
|
||||
|
||||
def is_sleeping(self) -> bool:
|
||||
return self.model_executor.is_sleeping
|
||||
|
||||
def execute_dummy_batch(self):
|
||||
self.model_executor.collective_rpc("execute_dummy_batch")
|
||||
|
||||
|
||||
@@ -89,6 +89,9 @@ class EngineCoreClient(ABC):
|
||||
def wake_up(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
def is_sleeping(self) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -128,6 +131,9 @@ class EngineCoreClient(ABC):
|
||||
async def wake_up_async(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
async def is_sleeping_async(self) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
async def abort_requests_async(self, request_ids: list[str]) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -182,6 +188,9 @@ class InprocClient(EngineCoreClient):
|
||||
def wake_up(self) -> None:
|
||||
self.engine_core.wake_up()
|
||||
|
||||
def is_sleeping(self) -> bool:
|
||||
return self.engine_core.is_sleeping()
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
self.engine_core.execute_dummy_batch()
|
||||
|
||||
@@ -433,6 +442,9 @@ class SyncMPClient(MPClient):
|
||||
def wake_up(self) -> None:
|
||||
self._call_utility("wake_up")
|
||||
|
||||
def is_sleeping(self) -> bool:
|
||||
return self._call_utility("is_sleeping")
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
self._call_utility("execute_dummy_batch")
|
||||
|
||||
@@ -523,6 +535,9 @@ class AsyncMPClient(MPClient):
|
||||
async def wake_up_async(self) -> None:
|
||||
await self._call_utility_async("wake_up")
|
||||
|
||||
async def is_sleeping_async(self) -> bool:
|
||||
return await self._call_utility_async("is_sleeping")
|
||||
|
||||
async def execute_dummy_batch_async(self) -> None:
|
||||
await self._call_utility_async("execute_dummy_batch")
|
||||
|
||||
|
||||
@@ -235,6 +235,9 @@ class LLMEngine:
|
||||
def wake_up(self):
|
||||
self.engine_core.wake_up()
|
||||
|
||||
def is_sleeping(self) -> bool:
|
||||
return self.engine_core.is_sleeping()
|
||||
|
||||
def get_tokenizer_group(
|
||||
self,
|
||||
group_type: type[_G] = BaseTokenizerGroup,
|
||||
|
||||
Reference in New Issue
Block a user