[Misc] Support passing multiple request ids at once to AsyncLLM.abort() (#22944)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import asyncio
|
||||
import time
|
||||
from collections.abc import AsyncGenerator, Mapping
|
||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||
from copy import copy
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
@@ -27,7 +27,8 @@ from vllm.transformers_utils.config import (
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import Device, cancel_task_threadsafe, cdiv, deprecate_kwargs
|
||||
from vllm.utils import (Device, as_list, cancel_task_threadsafe, cdiv,
|
||||
deprecate_kwargs)
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
||||
@@ -431,14 +432,16 @@ class AsyncLLM(EngineClient):
|
||||
|
||||
self.output_handler = asyncio.create_task(output_handler())
|
||||
|
||||
async def abort(self, request_id: str) -> None:
|
||||
async def abort(self, request_id: Union[str, Iterable[str]]) -> None:
|
||||
"""Abort RequestId in OutputProcessor and EngineCore."""
|
||||
|
||||
request_ids = self.output_processor.abort_requests((request_id, ))
|
||||
await self.engine_core.abort_requests_async(request_ids)
|
||||
request_ids = (request_id, ) if isinstance(
|
||||
request_id, str) else as_list(request_id)
|
||||
all_request_ids = self.output_processor.abort_requests(request_ids)
|
||||
await self.engine_core.abort_requests_async(all_request_ids)
|
||||
|
||||
if self.log_requests:
|
||||
logger.info("Aborted request %s.", request_id)
|
||||
logger.info("Aborted request(s) %s.", ",".join(request_ids))
|
||||
|
||||
async def encode(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user