[Core] Refactor Worker and ModelRunner to consolidate control plane communication (#5408)
Signed-off-by: Stephanie Wang <swang@cs.berkeley.edu> Signed-off-by: Stephanie <swang@anyscale.com> Co-authored-by: Stephanie <swang@anyscale.com>
This commit is contained in:
@@ -64,8 +64,8 @@ class DistributedGPUExecutor(GPUExecutor):
|
||||
num_cpu_blocks=num_cpu_blocks)
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
|
||||
self, execute_model_req: ExecuteModelRequest
|
||||
) -> Optional[List[SamplerOutput]]:
|
||||
if self.parallel_worker_tasks is None:
|
||||
self.parallel_worker_tasks = self._run_workers(
|
||||
"start_worker_execution_loop",
|
||||
@@ -79,7 +79,7 @@ class DistributedGPUExecutor(GPUExecutor):
|
||||
if self.parallel_worker_tasks is None:
|
||||
return
|
||||
|
||||
self._driver_execute_model()
|
||||
self._driver_execute_model(execute_model_req=None)
|
||||
parallel_worker_tasks = self.parallel_worker_tasks
|
||||
self.parallel_worker_tasks = None
|
||||
# Ensure that workers exit model loop cleanly
|
||||
@@ -123,13 +123,13 @@ class DistributedGPUExecutor(GPUExecutor):
|
||||
|
||||
@abstractmethod
|
||||
def _driver_execute_model(
|
||||
self,
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None
|
||||
) -> List[SamplerOutput]:
|
||||
self, execute_model_req: Optional[ExecuteModelRequest]
|
||||
) -> Optional[List[SamplerOutput]]:
|
||||
"""Run execute_model in the driver worker.
|
||||
|
||||
Passing None will cause the driver to stop the model execution
|
||||
loop running in each of the remote workers.
|
||||
Passing None will cause the driver to stop the model execution loop
|
||||
running in each of the remote workers. In this case, this method
|
||||
returns None. Otherwise, this method returns the model output.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -69,8 +69,8 @@ class ExecutorBase(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def execute_model(
|
||||
self,
|
||||
execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
|
||||
self, execute_model_req: ExecuteModelRequest
|
||||
) -> Optional[List[SamplerOutput]]:
|
||||
"""Executes at least one model step on the given sequences."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ class GPUExecutor(ExecutorBase):
|
||||
|
||||
def execute_model(
|
||||
self, execute_model_req: ExecuteModelRequest
|
||||
) -> List[Union[SamplerOutput, PoolerOutput]]:
|
||||
) -> Optional[List[Union[SamplerOutput, PoolerOutput]]]:
|
||||
output = self.driver_worker.execute_model(execute_model_req)
|
||||
return output
|
||||
|
||||
|
||||
@@ -78,16 +78,14 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
|
||||
worker_monitor.close()
|
||||
|
||||
def _driver_execute_model(
|
||||
self,
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None
|
||||
) -> List[SamplerOutput]:
|
||||
self, execute_model_req: Optional[ExecuteModelRequest]
|
||||
) -> Optional[List[SamplerOutput]]:
|
||||
"""Run execute_model in the driver worker.
|
||||
|
||||
Passing None will cause the driver to stop the model execution
|
||||
loop running in each of the remote workers.
|
||||
"""
|
||||
return self.driver_worker.execute_model(
|
||||
execute_model_req=execute_model_req)
|
||||
return self.driver_worker.execute_model(execute_model_req)
|
||||
|
||||
def _run_workers(
|
||||
self,
|
||||
|
||||
@@ -55,8 +55,7 @@ class NeuronExecutor(ExecutorBase):
|
||||
assert execute_model_req.num_lookahead_slots == 0, (
|
||||
"lookahead not supported for Neuron backend.")
|
||||
|
||||
output = self.driver_worker.execute_model(
|
||||
execute_model_req.seq_group_metadata_list)
|
||||
output = self.driver_worker.execute_model(execute_model_req)
|
||||
return output
|
||||
|
||||
def add_lora(self, lora_request: LoRARequest) -> bool:
|
||||
|
||||
@@ -190,9 +190,8 @@ class RayGPUExecutor(DistributedGPUExecutor):
|
||||
max_parallel_loading_workers)
|
||||
|
||||
def _driver_execute_model(
|
||||
self,
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None
|
||||
) -> List[SamplerOutput]:
|
||||
self, execute_model_req: Optional[ExecuteModelRequest]
|
||||
) -> Optional[List[SamplerOutput]]:
|
||||
"""Run execute_model in the driver worker.
|
||||
|
||||
Passing None will cause the driver to stop the model execution
|
||||
|
||||
Reference in New Issue
Block a user