[RL] Add Pause and Resume Generation for Asynchronous RL Training (#28037)
Signed-off-by: SamitHuang <285365963@qq.com> Signed-off-by: Samit <285365963@qq.com> Signed-off-by: samithuang <285365963@qq.com> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
@@ -149,6 +149,33 @@ class EngineClient(ABC):
|
||||
"""Load a new LoRA adapter into the engine for future requests."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def pause_generation(
|
||||
self,
|
||||
*,
|
||||
wait_for_inflight_requests: bool = False,
|
||||
clear_cache: bool = True,
|
||||
) -> None:
|
||||
"""Pause new generation/encoding requests.
|
||||
|
||||
Args:
|
||||
wait_for_inflight_requests: When ``True`` waits for in-flight requests
|
||||
to finish before pausing. When ``False`` (default), aborts in-flight
|
||||
requests immediately.
|
||||
clear_cache: Whether to clear KV and prefix caches after draining.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def resume_generation(self) -> None:
|
||||
"""Resume accepting generation/encoding requests."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def is_paused(self) -> bool:
|
||||
"""Return whether the engine is currently paused."""
|
||||
...
|
||||
|
||||
async def scale_elastic_ep(
|
||||
self, new_data_parallel_size: int, drain_timeout: int = 300
|
||||
) -> None:
|
||||
|
||||
Reference in New Issue
Block a user