[1/2][lmcache connector] clean up lmcache multi-process adapter (#31838)

Signed-off-by: ApostaC <yihua98@uchicago.edu>
This commit is contained in:
Yihua Cheng
2026-01-06 18:02:42 -08:00
committed by GitHub
parent 873480d133
commit 5b833be49e
2 changed files with 23 additions and 7 deletions

View File

@@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter:
kv_rank: The kv rank used for LMCache keys kv_rank: The kv rank used for LMCache keys
vllm_block_size: The block size used in vLLM vllm_block_size: The block size used in vLLM
""" """
logger.warning(
"Importing LMCacheMPSchedulerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context) self.mq_client = MessageQueueClient(server_url, context)
# Request futures # Request futures
@@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter:
""" """
return self.blocks_in_chunk return self.blocks_in_chunk
def _cleanup_lookup_result(self, request_id: str) -> None: def cleanup_lookup_result(self, request_id: str) -> None:
""" """
Clean up lookup future for a finished request to prevent memory leak. Clean up lookup future for a finished request to prevent memory leak.
Args: Args:
@@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter:
kv_rank: int, kv_rank: int,
vllm_block_size: int, vllm_block_size: int,
): ):
logger.warning(
"Importing LMCacheMPWorkerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context) self.mq_client = MessageQueueClient(server_url, context)
# Instance id for GPU worker # Instance id for GPU worker

View File

@@ -17,15 +17,23 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorMetadata, KVConnectorMetadata,
KVConnectorRole, KVConnectorRole,
) )
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.outputs import KVConnectorOutput
from vllm.v1.request import RequestStatus
from vllm.v1.utils import ConstantList
try:
from lmcache.integration.vllm.vllm_multi_process_adapter import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
except ImportError:
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import ( from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
LMCacheMPSchedulerAdapter, LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter, LMCacheMPWorkerAdapter,
LoadStoreOp, LoadStoreOp,
) )
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.outputs import KVConnectorOutput
from vllm.v1.request import RequestStatus
from vllm.v1.utils import ConstantList
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.config import VllmConfig from vllm.config import VllmConfig
@@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
else LMCacheMPRequestState.READY else LMCacheMPRequestState.READY
) )
# Clean up lookup future in scheduler adapter # Clean up lookup future in scheduler adapter
self.scheduler_adapter._cleanup_lookup_result(request.request_id) self.scheduler_adapter.cleanup_lookup_result(request.request_id)
def build_connector_meta( def build_connector_meta(
self, scheduler_output: SchedulerOutput self, scheduler_output: SchedulerOutput