[FEAT]Support reset prefix cache by specified device (#15003)

This commit is contained in:
maobaolong
2025-03-20 01:54:41 +08:00
committed by GitHub
parent 61c7a1b856
commit 26dd972adb
15 changed files with 49 additions and 34 deletions

View File

@@ -20,6 +20,7 @@ from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer_group import (
BaseTokenizerGroup, init_tokenizer_from_configs)
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.output_processor import OutputProcessor
from vllm.v1.engine.parallel_sampling import ParentRequest
@@ -226,7 +227,7 @@ class LLMEngine:
def stop_profile(self):
self.engine_core.profile(False)
def reset_prefix_cache(self):
def reset_prefix_cache(self, device: Optional[Device] = None):
self.engine_core.reset_prefix_cache()
def sleep(self, level: int = 1):