[1/2][lmcache connector] clean up lmcache multi-process adapter (#31838)
Signed-off-by: ApostaC <yihua98@uchicago.edu>
This commit is contained in:
@@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter:
|
||||
kv_rank: The kv rank used for LMCache keys
|
||||
vllm_block_size: The block size used in vLLM
|
||||
"""
|
||||
logger.warning(
|
||||
"Importing LMCacheMPSchedulerAdapter is deprecated. "
|
||||
"Please update your LMCache to the latest version."
|
||||
)
|
||||
self.mq_client = MessageQueueClient(server_url, context)
|
||||
|
||||
# Request futures
|
||||
@@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter:
|
||||
"""
|
||||
return self.blocks_in_chunk
|
||||
|
||||
def _cleanup_lookup_result(self, request_id: str) -> None:
|
||||
def cleanup_lookup_result(self, request_id: str) -> None:
|
||||
"""
|
||||
Clean up lookup future for a finished request to prevent memory leak.
|
||||
Args:
|
||||
@@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter:
|
||||
kv_rank: int,
|
||||
vllm_block_size: int,
|
||||
):
|
||||
logger.warning(
|
||||
"Importing LMCacheMPWorkerAdapter is deprecated. "
|
||||
"Please update your LMCache to the latest version."
|
||||
)
|
||||
self.mq_client = MessageQueueClient(server_url, context)
|
||||
|
||||
# Instance id for GPU worker
|
||||
|
||||
@@ -17,16 +17,24 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
KVConnectorMetadata,
|
||||
KVConnectorRole,
|
||||
)
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
|
||||
LMCacheMPSchedulerAdapter,
|
||||
LMCacheMPWorkerAdapter,
|
||||
LoadStoreOp,
|
||||
)
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.outputs import KVConnectorOutput
|
||||
from vllm.v1.request import RequestStatus
|
||||
from vllm.v1.utils import ConstantList
|
||||
|
||||
try:
|
||||
from lmcache.integration.vllm.vllm_multi_process_adapter import (
|
||||
LMCacheMPSchedulerAdapter,
|
||||
LMCacheMPWorkerAdapter,
|
||||
LoadStoreOp,
|
||||
)
|
||||
except ImportError:
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
|
||||
LMCacheMPSchedulerAdapter,
|
||||
LMCacheMPWorkerAdapter,
|
||||
LoadStoreOp,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.distributed.kv_events import KVCacheEvent
|
||||
@@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
||||
else LMCacheMPRequestState.READY
|
||||
)
|
||||
# Clean up lookup future in scheduler adapter
|
||||
self.scheduler_adapter._cleanup_lookup_result(request.request_id)
|
||||
self.scheduler_adapter.cleanup_lookup_result(request.request_id)
|
||||
|
||||
def build_connector_meta(
|
||||
self, scheduler_output: SchedulerOutput
|
||||
|
||||
Reference in New Issue
Block a user