[FEAT]Support reset prefix cache by specified device (#15003)
This commit is contained in:
@@ -18,7 +18,7 @@ from vllm.pooling_params import PoolingParams
|
||||
from vllm.prompt_adapter.request import PromptAdapterRequest
|
||||
from vllm.sampling_params import BeamSearchParams, SamplingParams
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.utils import collect_from_async_generator, random_uuid
|
||||
from vllm.utils import Device, collect_from_async_generator, random_uuid
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -274,7 +274,8 @@ class EngineClient(ABC):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def reset_prefix_cache(self) -> None:
|
||||
async def reset_prefix_cache(self,
|
||||
device: Optional[Device] = None) -> None:
|
||||
"""Reset the prefix cache"""
|
||||
...
|
||||
|
||||
|
||||
Reference in New Issue
Block a user