[1/2][lmcache connector] clean up lmcache multi-process adapter (#31838)
Signed-off-by: ApostaC <yihua98@uchicago.edu>
This commit is contained in:
@@ -95,6 +95,10 @@ class LMCacheMPSchedulerAdapter:
|
|||||||
kv_rank: The kv rank used for LMCache keys
|
kv_rank: The kv rank used for LMCache keys
|
||||||
vllm_block_size: The block size used in vLLM
|
vllm_block_size: The block size used in vLLM
|
||||||
"""
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"Importing LMCacheMPSchedulerAdapter is deprecated. "
|
||||||
|
"Please update your LMCache to the latest version."
|
||||||
|
)
|
||||||
self.mq_client = MessageQueueClient(server_url, context)
|
self.mq_client = MessageQueueClient(server_url, context)
|
||||||
|
|
||||||
# Request futures
|
# Request futures
|
||||||
@@ -147,7 +151,7 @@ class LMCacheMPSchedulerAdapter:
|
|||||||
"""
|
"""
|
||||||
return self.blocks_in_chunk
|
return self.blocks_in_chunk
|
||||||
|
|
||||||
def _cleanup_lookup_result(self, request_id: str) -> None:
|
def cleanup_lookup_result(self, request_id: str) -> None:
|
||||||
"""
|
"""
|
||||||
Clean up lookup future for a finished request to prevent memory leak.
|
Clean up lookup future for a finished request to prevent memory leak.
|
||||||
Args:
|
Args:
|
||||||
@@ -176,6 +180,10 @@ class LMCacheMPWorkerAdapter:
|
|||||||
kv_rank: int,
|
kv_rank: int,
|
||||||
vllm_block_size: int,
|
vllm_block_size: int,
|
||||||
):
|
):
|
||||||
|
logger.warning(
|
||||||
|
"Importing LMCacheMPWorkerAdapter is deprecated. "
|
||||||
|
"Please update your LMCache to the latest version."
|
||||||
|
)
|
||||||
self.mq_client = MessageQueueClient(server_url, context)
|
self.mq_client = MessageQueueClient(server_url, context)
|
||||||
|
|
||||||
# Instance id for GPU worker
|
# Instance id for GPU worker
|
||||||
|
|||||||
@@ -17,16 +17,24 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
|||||||
KVConnectorMetadata,
|
KVConnectorMetadata,
|
||||||
KVConnectorRole,
|
KVConnectorRole,
|
||||||
)
|
)
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
|
|
||||||
LMCacheMPSchedulerAdapter,
|
|
||||||
LMCacheMPWorkerAdapter,
|
|
||||||
LoadStoreOp,
|
|
||||||
)
|
|
||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
from vllm.v1.outputs import KVConnectorOutput
|
from vllm.v1.outputs import KVConnectorOutput
|
||||||
from vllm.v1.request import RequestStatus
|
from vllm.v1.request import RequestStatus
|
||||||
from vllm.v1.utils import ConstantList
|
from vllm.v1.utils import ConstantList
|
||||||
|
|
||||||
|
try:
|
||||||
|
from lmcache.integration.vllm.vllm_multi_process_adapter import (
|
||||||
|
LMCacheMPSchedulerAdapter,
|
||||||
|
LMCacheMPWorkerAdapter,
|
||||||
|
LoadStoreOp,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
|
||||||
|
LMCacheMPSchedulerAdapter,
|
||||||
|
LMCacheMPWorkerAdapter,
|
||||||
|
LoadStoreOp,
|
||||||
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.distributed.kv_events import KVCacheEvent
|
from vllm.distributed.kv_events import KVCacheEvent
|
||||||
@@ -702,7 +710,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
|||||||
else LMCacheMPRequestState.READY
|
else LMCacheMPRequestState.READY
|
||||||
)
|
)
|
||||||
# Clean up lookup future in scheduler adapter
|
# Clean up lookup future in scheduler adapter
|
||||||
self.scheduler_adapter._cleanup_lookup_result(request.request_id)
|
self.scheduler_adapter.cleanup_lookup_result(request.request_id)
|
||||||
|
|
||||||
def build_connector_meta(
|
def build_connector_meta(
|
||||||
self, scheduler_output: SchedulerOutput
|
self, scheduler_output: SchedulerOutput
|
||||||
|
|||||||
Reference in New Issue
Block a user