[Bugfix] Properly abort pooling request. (#25734)

Signed-off-by: wang.yuqi <noooop@126.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
wang.yuqi
2025-09-26 20:47:34 +08:00
committed by GitHub
parent 2827b3f4a3
commit fe6b19c314
2 changed files with 41 additions and 1 deletions

View File

@@ -335,7 +335,14 @@ class OutputProcessor:
# Produce final abort output.
if req_state.queue is not None and (
request_output := req_state.make_request_output(
[], None, FinishReason.ABORT, None, None)):
new_token_ids=[],
# Set pooling_output is not None to
# correctly enter the abort pooling branch
pooling_output=torch.randn(0, device="cpu")
if req_state.detokenizer is None else None,
finish_reason=FinishReason.ABORT,
stop_reason=None,
kv_transfer_params=None)):
req_state.queue.put(request_output)
elif parent := self.parent_requests.get(request_id):
# Abort children prior to removing the parent.