[1/2][lmcache connector] clean up lmcache multi-process adapter (#31838)

Signed-off-by: ApostaC <yihua98@uchicago.edu>
This commit is contained in:
Yihua Cheng
2026-01-06 18:02:42 -08:00
committed by GitHub
parent 873480d133
commit 5b833be49e
2 changed files with 23 additions and 7 deletions

View File

@@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter:
kv_rank: The kv rank used for LMCache keys kv_rank: The kv rank used for LMCache keys
vllm_block_size: The block size used in vLLM vllm_block_size: The block size used in vLLM
""" """
logger.warning(
"Importing LMCacheMPSchedulerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context) self.mq_client = MessageQueueClient(server_url, context)
# Request futures # Request futures
@@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter:
""" """
return self.blocks_in_chunk return self.blocks_in_chunk
def _cleanup_lookup_result(self, request_id: str) -> None: def cleanup_lookup_result(self, request_id: str) -> None:
""" """
Clean up lookup future for a finished request to prevent memory leak. Clean up lookup future for a finished request to prevent memory leak.
Args: Args:
@@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter:
kv_rank: int, kv_rank: int,
vllm_block_size: int, vllm_block_size: int,
): ):
logger.warning(
"Importing LMCacheMPWorkerAdapter is deprecated. "
"Please update your LMCache to the latest version."
)
self.mq_client = MessageQueueClient(server_url, context) self.mq_client = MessageQueueClient(server_url, context)
# Instance id for GPU worker # Instance id for GPU worker

View File

@@ -17,16 +17,24 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorMetadata, KVConnectorMetadata,
KVConnectorRole, KVConnectorRole,
) )
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.outputs import KVConnectorOutput from vllm.v1.outputs import KVConnectorOutput
from vllm.v1.request import RequestStatus from vllm.v1.request import RequestStatus
from vllm.v1.utils import ConstantList from vllm.v1.utils import ConstantList
try:
from lmcache.integration.vllm.vllm_multi_process_adapter import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
except ImportError:
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
LMCacheMPSchedulerAdapter,
LMCacheMPWorkerAdapter,
LoadStoreOp,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.distributed.kv_events import KVCacheEvent from vllm.distributed.kv_events import KVCacheEvent
@@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
else LMCacheMPRequestState.READY else LMCacheMPRequestState.READY
) )
# Clean up lookup future in scheduler adapter # Clean up lookup future in scheduler adapter
self.scheduler_adapter._cleanup_lookup_result(request.request_id) self.scheduler_adapter.cleanup_lookup_result(request.request_id)
def build_connector_meta( def build_connector_meta(
self, scheduler_output: SchedulerOutput self, scheduler_output: SchedulerOutput