diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py index 9db4dedf4..d865f70bd 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py @@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter: kv_rank: The kv rank used for LMCache keys vllm_block_size: The block size used in vLLM """ + logger.warning( + "Importing LMCacheMPSchedulerAdapter is deprecated. " + "Please update your LMCache to the latest version." + ) self.mq_client = MessageQueueClient(server_url, context) # Request futures @@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter: """ return self.blocks_in_chunk - def _cleanup_lookup_result(self, request_id: str) -> None: + def cleanup_lookup_result(self, request_id: str) -> None: """ Clean up lookup future for a finished request to prevent memory leak. Args: @@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter: kv_rank: int, vllm_block_size: int, ): + logger.warning( + "Importing LMCacheMPWorkerAdapter is deprecated. " + "Please update your LMCache to the latest version." + ) self.mq_client = MessageQueueClient(server_url, context) # Instance id for GPU worker diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py index 29166be62..9ebd2b1a3 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py @@ -17,16 +17,24 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import ( KVConnectorMetadata, KVConnectorRole, ) -from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import ( - LMCacheMPSchedulerAdapter, - LMCacheMPWorkerAdapter, - LoadStoreOp, -) from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.outputs import KVConnectorOutput from vllm.v1.request import RequestStatus from vllm.v1.utils import ConstantList +try: + from lmcache.integration.vllm.vllm_multi_process_adapter import ( + LMCacheMPSchedulerAdapter, + LMCacheMPWorkerAdapter, + LoadStoreOp, + ) +except ImportError: + from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import ( + LMCacheMPSchedulerAdapter, + LMCacheMPWorkerAdapter, + LoadStoreOp, + ) + if TYPE_CHECKING: from vllm.config import VllmConfig from vllm.distributed.kv_events import KVCacheEvent @@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1): else LMCacheMPRequestState.READY ) # Clean up lookup future in scheduler adapter - self.scheduler_adapter._cleanup_lookup_result(request.request_id) + self.scheduler_adapter.cleanup_lookup_result(request.request_id) def build_connector_meta( self, scheduler_output: SchedulerOutput