[Core] Support reset_prefix_cache (#12284)
This commit is contained in:
@@ -20,7 +20,7 @@ from vllm.v1.core.kv_cache_utils import get_kv_cache_config
|
||||
from vllm.v1.core.scheduler import Scheduler
|
||||
from vllm.v1.engine import (EngineCoreOutputs, EngineCoreProfile,
|
||||
EngineCoreRequest, EngineCoreRequestType,
|
||||
EngineCoreRequestUnion)
|
||||
EngineCoreRequestUnion, EngineCoreResetPrefixCache)
|
||||
from vllm.v1.engine.mm_input_mapper import MMInputMapperServer
|
||||
from vllm.v1.executor.abstract import Executor
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
@@ -135,6 +135,9 @@ class EngineCore:
|
||||
def profile(self, is_start: bool = True):
|
||||
self.model_executor.profile(is_start)
|
||||
|
||||
def reset_prefix_cache(self):
|
||||
self.scheduler.reset_prefix_cache()
|
||||
|
||||
|
||||
class EngineCoreProc(EngineCore):
|
||||
"""ZMQ-wrapper for running EngineCore in background process."""
|
||||
@@ -247,6 +250,8 @@ class EngineCoreProc(EngineCore):
|
||||
self.add_request(request)
|
||||
elif isinstance(request, EngineCoreProfile):
|
||||
self.model_executor.profile(request.is_start)
|
||||
elif isinstance(request, EngineCoreResetPrefixCache):
|
||||
self.reset_prefix_cache()
|
||||
else:
|
||||
# TODO: make an EngineCoreAbort wrapper
|
||||
assert isinstance(request, list)
|
||||
@@ -271,7 +276,9 @@ class EngineCoreProc(EngineCore):
|
||||
request = decoder_add_req.decode(request_data)
|
||||
elif request_type == EngineCoreRequestType.ABORT.value:
|
||||
request = decoder_abort_req.decode(request_data)
|
||||
elif request_type == EngineCoreRequestType.PROFILE.value:
|
||||
elif request_type in (
|
||||
EngineCoreRequestType.PROFILE.value,
|
||||
EngineCoreRequestType.RESET_PREFIX_CACHE.value):
|
||||
request = pickle.loads(request_data)
|
||||
else:
|
||||
raise ValueError(f"Unknown RequestType: {request_type}")
|
||||
|
||||
Reference in New Issue
Block a user