[FEAT]Support reset prefix cache by specified device (#15003)

This commit is contained in:
maobaolong
2025-03-20 01:54:41 +08:00
committed by GitHub
parent 61c7a1b856
commit 26dd972adb
15 changed files with 49 additions and 34 deletions

View File

@@ -18,7 +18,7 @@ from vllm.pooling_params import PoolingParams
from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import collect_from_async_generator, random_uuid
from vllm.utils import Device, collect_from_async_generator, random_uuid
logger = init_logger(__name__)
@@ -274,7 +274,8 @@ class EngineClient(ABC):
...
@abstractmethod
async def reset_prefix_cache(self) -> None:
async def reset_prefix_cache(self,
device: Optional[Device] = None) -> None:
"""Reset the prefix cache"""
...