[n-gen] DO NOT repeatedly return finished child requests (#28591)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-12 19:36:07 -08:00
committed by GitHub
parent 97d1c99302
commit a1d3866dda
2 changed files with 115 additions and 3 deletions

View File

@@ -97,12 +97,21 @@ class ParentRequest:
child_request_id: str,
completion_output: CompletionOutput,
) -> tuple[str, list[CompletionOutput], bool]:
already_finished_and_returned: bool = False
if completion_output.finished():
self.child_requests.remove(child_request_id)
if child_request_id in self.child_requests:
self.child_requests.remove(child_request_id)
else:
# child request ID is not available in child_requests
# which means the request had finished in previous
# batch step and returned to the client earlier
already_finished_and_returned = True
if self.sampling_params.output_kind != RequestOutputKind.FINAL_ONLY:
# If streaming, just return the current output.
outputs = [completion_output]
# If streaming, just return the current output
#
# DO NOT output finished and already returned child request to client again
outputs = [] if already_finished_and_returned else [completion_output]
else:
# If not streaming, aggregate the n final outputs.
self.output_aggregator[completion_output.index] = completion_output