[BugFix][V1] Fix parallel sampling finishing/aborts (#14512)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -298,9 +298,8 @@ class AsyncLLM(EngineClient):
|
||||
async def abort(self, request_id: str) -> None:
|
||||
"""Abort RequestId in OutputProcessor and EngineCore."""
|
||||
|
||||
request_ids = [request_id]
|
||||
request_ids = self.output_processor.abort_requests((request_id, ))
|
||||
await self.engine_core.abort_requests_async(request_ids)
|
||||
self.output_processor.abort_requests(request_ids)
|
||||
|
||||
if self.log_requests:
|
||||
logger.info("Aborted request %s.", request_id)
|
||||
|
||||
Reference in New Issue
Block a user