[Bugfix][V1] Fix bug from putting llm_engine.model_executor in a background process (#15367)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
This commit is contained in:
wwl2755
2025-04-03 02:32:10 -05:00
committed by GitHub
parent 5e125e74d1
commit 463bbb1835
5 changed files with 167 additions and 4 deletions

View File

@@ -285,6 +285,16 @@ class EngineCore:
def pin_lora(self, lora_id: int) -> bool:
return self.model_executor.pin_lora(lora_id)
def save_sharded_state(
self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None,
) -> None:
self.model_executor.save_sharded_state(path=path,
pattern=pattern,
max_size=max_size)
def collective_rpc(self,
method: Union[str, Callable[..., _R]],
timeout: Optional[float] = None,

View File

@@ -119,6 +119,12 @@ class EngineCoreClient(ABC):
def pin_lora(self, lora_id: int) -> bool:
raise NotImplementedError
def save_sharded_state(self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None) -> None:
raise NotImplementedError
def collective_rpc(self,
method: Union[str, Callable[..., _R]],
timeout: Optional[float] = None,
@@ -162,6 +168,12 @@ class EngineCoreClient(ABC):
async def pin_lora_async(self, lora_id: int) -> bool:
raise NotImplementedError
async def save_sharded_state_async(self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None) -> None:
raise NotImplementedError
async def collective_rpc_async(
self,
method: Union[str, Callable[..., _R]],
@@ -227,6 +239,12 @@ class InprocClient(EngineCoreClient):
def pin_lora(self, lora_id: int) -> bool:
return self.engine_core.pin_lora(lora_id)
def save_sharded_state(self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None) -> None:
self.engine_core.save_sharded_state(path, pattern, max_size)
def collective_rpc(self,
method: Union[str, Callable[..., _R]],
timeout: Optional[float] = None,
@@ -537,6 +555,12 @@ class SyncMPClient(MPClient):
return self.call_utility("collective_rpc", method, timeout, args,
kwargs)
def save_sharded_state(self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None) -> None:
self.call_utility("save_sharded_state", path, pattern, max_size)
class AsyncMPClient(MPClient):
"""Asyncio-compatible client for multi-proc EngineCore."""
@@ -668,6 +692,13 @@ class AsyncMPClient(MPClient):
async def pin_lora_async(self, lora_id: int) -> bool:
return await self.call_utility_async("pin_lora", lora_id)
async def save_sharded_state_async(self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None) -> None:
await self.call_utility_async("save_sharded_state", path, pattern,
max_size)
async def collective_rpc_async(
self,
method: Union[str, Callable[..., _R]],