[Misc] Support passing multiple request ids at once to AsyncLLM.abort() (#22944)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-08-15 17:00:36 -07:00
committed by GitHub
parent 236b864e4f
commit ad0297d113
6 changed files with 105 additions and 14 deletions

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import time
from collections.abc import AsyncGenerator, Mapping
from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy
from typing import Any, Optional, Union
@@ -27,7 +27,8 @@ from vllm.transformers_utils.config import (
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, cancel_task_threadsafe, cdiv, deprecate_kwargs
from vllm.utils import (Device, as_list, cancel_task_threadsafe, cdiv,
deprecate_kwargs)
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
@@ -431,14 +432,16 @@ class AsyncLLM(EngineClient):
self.output_handler = asyncio.create_task(output_handler())
async def abort(self, request_id: str) -> None:
async def abort(self, request_id: Union[str, Iterable[str]]) -> None:
"""Abort RequestId in OutputProcessor and EngineCore."""
request_ids = self.output_processor.abort_requests((request_id, ))
await self.engine_core.abort_requests_async(request_ids)
request_ids = (request_id, ) if isinstance(
request_id, str) else as_list(request_id)
all_request_ids = self.output_processor.abort_requests(request_ids)
await self.engine_core.abort_requests_async(all_request_ids)
if self.log_requests:
logger.info("Aborted request %s.", request_id)
logger.info("Aborted request(s) %s.", ",".join(request_ids))
async def encode(
self,