[V1][BugFix] Free encoder cache for aborted requests (#12545)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -38,7 +38,8 @@ class EncoderCacheManager:
|
||||
def get_cached_input_ids(self, request: Request) -> Set[int]:
|
||||
return self.cached.get(request.request_id, set())
|
||||
|
||||
def free(self, request: Request, input_id: int) -> None:
|
||||
def free_encoder_input(self, request: Request, input_id: int) -> None:
|
||||
"""Free a single encoder input id for the request."""
|
||||
req_id = request.request_id
|
||||
if req_id not in self.cached:
|
||||
return
|
||||
@@ -49,6 +50,12 @@ class EncoderCacheManager:
|
||||
self.num_free_slots += request.get_num_encoder_tokens(input_id)
|
||||
self.freed.append((req_id, input_id))
|
||||
|
||||
def free(self, request: Request) -> None:
|
||||
"""Free all cached input ids for the request."""
|
||||
input_ids = self.get_cached_input_ids(request)
|
||||
for input_id in input_ids:
|
||||
self.free_encoder_input(request, input_id)
|
||||
|
||||
def get_freed_ids(self) -> List[Tuple[str, int]]:
|
||||
freed = self.freed
|
||||
self.freed = []
|
||||
|
||||
Reference in New Issue
Block a user