[BugFix][V1] Fix parallel sampling finishing/aborts (#14512)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-03-12 13:29:48 -04:00
committed by GitHub
parent 916836bbfb
commit f5d3acd474
7 changed files with 137 additions and 113 deletions

View File

@@ -298,9 +298,8 @@ class AsyncLLM(EngineClient):
async def abort(self, request_id: str) -> None:
"""Abort RequestId in OutputProcessor and EngineCore."""
request_ids = [request_id]
request_ids = self.output_processor.abort_requests((request_id, ))
await self.engine_core.abort_requests_async(request_ids)
self.output_processor.abort_requests(request_ids)
if self.log_requests:
logger.info("Aborted request %s.", request_id)