[Core] Asynchronous Output Processor (#7049)
Co-authored-by: Alexander Matveev <alexm@neuralmagic.com>
This commit is contained in:
@@ -64,8 +64,9 @@ class DistributedGPUExecutor(GPUExecutor):
|
||||
num_cpu_blocks=num_cpu_blocks)
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
|
||||
self,
|
||||
execute_model_req: ExecuteModelRequest,
|
||||
) -> List[SamplerOutput]:
|
||||
if self.parallel_worker_tasks is None:
|
||||
self.parallel_worker_tasks = self._run_workers(
|
||||
"start_worker_execution_loop",
|
||||
@@ -188,7 +189,7 @@ class DistributedGPUExecutorAsync(DistributedGPUExecutor, ExecutorAsyncBase):
|
||||
@abstractmethod
|
||||
async def _driver_execute_model_async(
|
||||
self,
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None,
|
||||
) -> List[SamplerOutput]:
|
||||
"""Execute the model asynchronously in the driver worker.
|
||||
|
||||
|
||||
@@ -176,5 +176,5 @@ class GPUExecutorAsync(GPUExecutor, ExecutorAsyncBase):
|
||||
execute_model_req: ExecuteModelRequest,
|
||||
) -> List[Union[SamplerOutput, PoolerOutput]]:
|
||||
output = await make_async(self.driver_worker.execute_model
|
||||
)(execute_model_req=execute_model_req, )
|
||||
)(execute_model_req=execute_model_req)
|
||||
return output
|
||||
|
||||
Reference in New Issue
Block a user