[FEAT]Support reset prefix cache by specified device (#15003)
This commit is contained in:
@@ -20,6 +20,7 @@ from vllm.sampling_params import SamplingParams
|
||||
from vllm.transformers_utils.tokenizer_group import (
|
||||
BaseTokenizerGroup, init_tokenizer_from_configs)
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import Device
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
from vllm.v1.engine.output_processor import OutputProcessor
|
||||
from vllm.v1.engine.parallel_sampling import ParentRequest
|
||||
@@ -226,7 +227,7 @@ class LLMEngine:
|
||||
def stop_profile(self):
|
||||
self.engine_core.profile(False)
|
||||
|
||||
def reset_prefix_cache(self):
|
||||
def reset_prefix_cache(self, device: Optional[Device] = None):
|
||||
self.engine_core.reset_prefix_cache()
|
||||
|
||||
def sleep(self, level: int = 1):
|
||||
|
||||
Reference in New Issue
Block a user