[FEAT]Support reset prefix cache by specified device (#15003)

This commit is contained in:
maobaolong
2025-03-20 01:54:41 +08:00
committed by GitHub
parent 61c7a1b856
commit 26dd972adb
15 changed files with 49 additions and 34 deletions

View File

@@ -35,7 +35,7 @@ from vllm.sampling_params import SamplingParams
from vllm.sequence import ExecuteModelRequest
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.usage.usage_lib import UsageContext
from vllm.utils import deprecate_kwargs, weak_bind
from vllm.utils import Device, deprecate_kwargs, weak_bind
logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@@ -1216,8 +1216,9 @@ class AsyncLLMEngine(EngineClient):
async def stop_profile(self) -> None:
self.engine.stop_profile()
async def reset_prefix_cache(self) -> None:
self.engine.reset_prefix_cache()
async def reset_prefix_cache(self,
device: Optional[Device] = None) -> None:
self.engine.reset_prefix_cache(device)
async def sleep(self, level: int = 1) -> None:
self.engine.sleep(level)