[Bugfix][Nixl] Fix full prefix cache hit bug (#18632)
Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -424,11 +424,11 @@ class Scheduler(SchedulerInterface):
|
||||
# The request cannot be scheduled.
|
||||
break
|
||||
|
||||
# KVConnector: update internal state after allocation.
|
||||
# KVTransfer: the connector uses this info to determine
|
||||
# if a load is needed. Note that
|
||||
# This information is used to determine if a load is
|
||||
# needed for this request.
|
||||
if num_external_computed_tokens:
|
||||
assert self.connector is not None
|
||||
if self.connector is not None:
|
||||
self.connector.update_state_after_alloc(
|
||||
request,
|
||||
new_computed_blocks + new_blocks,
|
||||
@@ -841,7 +841,7 @@ class Scheduler(SchedulerInterface):
|
||||
}
|
||||
|
||||
finished_req_ids = self.finished_req_ids_dict
|
||||
if finished_req_ids is not None:
|
||||
if finished_req_ids:
|
||||
# Include ids of requests that finished since last outputs
|
||||
# were sent.
|
||||
for client_index, finished_set in finished_req_ids.items():
|
||||
|
||||
Reference in New Issue
Block a user