[1/2][lmcache connector] clean up lmcache multi-process adapter (#31838)

Signed-off-by: ApostaC <yihua98@uchicago.edu>
This commit is contained in:
Yihua Cheng
2026-01-06 18:02:42 -08:00
committed by GitHub
parent 873480d133
commit 5b833be49e
2 changed files with 23 additions and 7 deletions

View File

@@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter:
kv_rank: The kv rank used for LMCache keys
vllm_block_size: The block size used in vLLM
"""
logger.warning(
"Importing LMCacheMPSchedulerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context)
# Request futures
@@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter:
"""
return self.blocks_in_chunk
def _cleanup_lookup_result(self, request_id: str) -> None:
def cleanup_lookup_result(self, request_id: str) -> None:
"""
Clean up lookup future for a finished request to prevent memory leak.
Args:
@@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter:
kv_rank: int,
vllm_block_size: int,
):
logger.warning(
"Importing LMCacheMPWorkerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context)
# Instance id for GPU worker

View File

@@ -17,16 +17,24 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorMetadata,
KVConnectorRole,
)
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.outputs import KVConnectorOutput
from vllm.v1.request import RequestStatus
from vllm.v1.utils import ConstantList
try:
from lmcache.integration.vllm.vllm_multi_process_adapter import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
except ImportError:
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
if TYPE_CHECKING:
from vllm.config import VllmConfig
from vllm.distributed.kv_events import KVCacheEvent
@@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
else LMCacheMPRequestState.READY
)
# Clean up lookup future in scheduler adapter
self.scheduler_adapter._cleanup_lookup_result(request.request_id)
self.scheduler_adapter.cleanup_lookup_result(request.request_id)
def build_connector_meta(
self, scheduler_output: SchedulerOutput