[Bugfix] Surface exceptions from non-blocking execute_model in UniProcExecutor to avoid DP deadlocks (#35194)
Signed-off-by: fangyuchu <fangyuchu@qq.com>
This commit is contained in:
@@ -443,6 +443,7 @@ class EngineCore:
|
|||||||
deferred_scheduler_output = None
|
deferred_scheduler_output = None
|
||||||
if self.scheduler.has_requests():
|
if self.scheduler.has_requests():
|
||||||
scheduler_output = self.scheduler.schedule()
|
scheduler_output = self.scheduler.schedule()
|
||||||
|
with self.log_error_detail(scheduler_output):
|
||||||
exec_future = self.model_executor.execute_model(
|
exec_future = self.model_executor.execute_model(
|
||||||
scheduler_output, non_block=True
|
scheduler_output, non_block=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -100,12 +100,17 @@ class UniProcExecutor(Executor):
|
|||||||
def execute_model( # type: ignore[override]
|
def execute_model( # type: ignore[override]
|
||||||
self, scheduler_output: SchedulerOutput, non_block: bool = False
|
self, scheduler_output: SchedulerOutput, non_block: bool = False
|
||||||
) -> ModelRunnerOutput | None | Future[ModelRunnerOutput | None]:
|
) -> ModelRunnerOutput | None | Future[ModelRunnerOutput | None]:
|
||||||
return self.collective_rpc(
|
output = self.collective_rpc(
|
||||||
"execute_model",
|
"execute_model",
|
||||||
args=(scheduler_output,),
|
args=(scheduler_output,),
|
||||||
non_block=non_block,
|
non_block=non_block,
|
||||||
single_value=True,
|
single_value=True,
|
||||||
)
|
)
|
||||||
|
# In non-blocking mode, surface any exception as early as possible.
|
||||||
|
if non_block and output.done():
|
||||||
|
# Raise the exception in-line if the task failed.
|
||||||
|
output.result()
|
||||||
|
return output
|
||||||
|
|
||||||
def sample_tokens( # type: ignore[override]
|
def sample_tokens( # type: ignore[override]
|
||||||
self, grammar_output: GrammarOutput | None, non_block: bool = False
|
self, grammar_output: GrammarOutput | None, non_block: bool = False
|
||||||
|
|||||||
Reference in New Issue
Block a user