[FEAT]Support reset prefix cache by specified device (#15003)
This commit is contained in:
@@ -35,7 +35,7 @@ from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import ExecuteModelRequest
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import deprecate_kwargs, weak_bind
|
||||
from vllm.utils import Device, deprecate_kwargs, weak_bind
|
||||
|
||||
logger = init_logger(__name__)
|
||||
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||
@@ -1216,8 +1216,9 @@ class AsyncLLMEngine(EngineClient):
|
||||
async def stop_profile(self) -> None:
|
||||
self.engine.stop_profile()
|
||||
|
||||
async def reset_prefix_cache(self) -> None:
|
||||
self.engine.reset_prefix_cache()
|
||||
async def reset_prefix_cache(self,
|
||||
device: Optional[Device] = None) -> None:
|
||||
self.engine.reset_prefix_cache(device)
|
||||
|
||||
async def sleep(self, level: int = 1) -> None:
|
||||
self.engine.sleep(level)
|
||||
|
||||
Reference in New Issue
Block a user