From 63298ee17350e4eda3f574eab16286bc405b23a6 Mon Sep 17 00:00:00 2001 From: Roy Huang Date: Sat, 7 Mar 2026 13:52:35 -0800 Subject: [PATCH] [Bugfix][LMCache][KVConnector] fix potential memory leak in LMCache multiprocess mode (#35931) --- .../kv_connector/v1/lmcache_mp_connector.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py index fc31836aa..db1d34ca1 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py @@ -721,6 +721,34 @@ class LMCacheMPConnector(KVConnectorBase_V1): # Clean up lookup future in scheduler adapter self.scheduler_adapter.cleanup_lookup_result(request.request_id) + # Free locks on chunks that vLLM already computed and won't + # retrieve from LMCache. + if tracker.num_lmcache_hit_blocks > 0: + if not condition: + # No retrieve needed — free ALL locked chunks + free_end = tracker.num_lmcache_hit_blocks * self.vllm_block_size + else: + # Note(Roy): Boundary misalignment between vLLM blocks and LMCache + # blocks is handled in free_lookup_locks. It makes sure that if + # the last vLLM computed block ends in the middle of a LMCache + # block, the end LMCache block is not freed (i.e., floor division) + # since it will still be needed by vLLM and such block's lock will + # be freed by vLLM's retrieve. + free_end = tracker.num_vllm_hit_blocks * self.vllm_block_size + + if free_end > 0: + self.scheduler_adapter.free_lookup_locks( + token_ids=list(tracker.all_token_ids), + start=0, + end=free_end, + request_id=request.request_id, + ) + logger.debug( + "Free locks of tokens %d-%d since it is cached by vLLM.", + 0, + free_end, + ) + def build_connector_meta( self, scheduler_output: SchedulerOutput ) -> KVConnectorMetadata: