[Core] Expose API endpoint /is_sleeping (#14312)

Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
This commit is contained in:
Jun Duan
2025-03-15 09:28:14 -04:00
committed by GitHub
parent f58aea002c
commit 74bc397b0a
12 changed files with 100 additions and 4 deletions

View File

@@ -407,6 +407,9 @@ class AsyncLLM(EngineClient):
async def wake_up(self) -> None:
await self.engine_core.wake_up_async()
async def is_sleeping(self) -> bool:
return await self.engine_core.is_sleeping_async()
async def add_lora(self, lora_request: LoRARequest) -> bool:
"""Load a new LoRA adapter into the engine for future requests."""
return await self.engine_core.add_lora_async(lora_request)

View File

@@ -253,6 +253,9 @@ class EngineCore:
def wake_up(self):
self.model_executor.wake_up()
def is_sleeping(self) -> bool:
return self.model_executor.is_sleeping
def execute_dummy_batch(self):
self.model_executor.collective_rpc("execute_dummy_batch")

View File

@@ -89,6 +89,9 @@ class EngineCoreClient(ABC):
def wake_up(self) -> None:
raise NotImplementedError
def is_sleeping(self) -> bool:
raise NotImplementedError
def execute_dummy_batch(self) -> None:
raise NotImplementedError
@@ -128,6 +131,9 @@ class EngineCoreClient(ABC):
async def wake_up_async(self) -> None:
raise NotImplementedError
async def is_sleeping_async(self) -> bool:
raise NotImplementedError
async def abort_requests_async(self, request_ids: list[str]) -> None:
raise NotImplementedError
@@ -182,6 +188,9 @@ class InprocClient(EngineCoreClient):
def wake_up(self) -> None:
self.engine_core.wake_up()
def is_sleeping(self) -> bool:
return self.engine_core.is_sleeping()
def execute_dummy_batch(self) -> None:
self.engine_core.execute_dummy_batch()
@@ -433,6 +442,9 @@ class SyncMPClient(MPClient):
def wake_up(self) -> None:
self._call_utility("wake_up")
def is_sleeping(self) -> bool:
return self._call_utility("is_sleeping")
def execute_dummy_batch(self) -> None:
self._call_utility("execute_dummy_batch")
@@ -523,6 +535,9 @@ class AsyncMPClient(MPClient):
async def wake_up_async(self) -> None:
await self._call_utility_async("wake_up")
async def is_sleeping_async(self) -> bool:
return await self._call_utility_async("is_sleeping")
async def execute_dummy_batch_async(self) -> None:
await self._call_utility_async("execute_dummy_batch")

View File

@@ -235,6 +235,9 @@ class LLMEngine:
def wake_up(self):
self.engine_core.wake_up()
def is_sleeping(self) -> bool:
return self.engine_core.is_sleeping()
def get_tokenizer_group(
self,
group_type: type[_G] = BaseTokenizerGroup,