[Bugfix][Nixl] Fix full prefix cache hit bug (#18632)

Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
Signed-off-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Robert Shaw
2025-06-04 22:07:32 -04:00
committed by GitHub
parent 78dcf56cb3
commit c56ed8bb0e
4 changed files with 97 additions and 81 deletions

View File

@@ -424,11 +424,11 @@ class Scheduler(SchedulerInterface):
# The request cannot be scheduled.
break
# KVConnector: update internal state after allocation.
# KVTransfer: the connector uses this info to determine
# if a load is needed. Note that
# This information is used to determine if a load is
# needed for this request.
if num_external_computed_tokens:
assert self.connector is not None
if self.connector is not None:
self.connector.update_state_after_alloc(
request,
new_computed_blocks + new_blocks,
@@ -841,7 +841,7 @@ class Scheduler(SchedulerInterface):
}
finished_req_ids = self.finished_req_ids_dict
if finished_req_ids is not None:
if finished_req_ids:
# Include ids of requests that finished since last outputs
# were sent.
for client_index, finished_set in finished_req_ids.items():