[V1] Simplify GPUModelRunner._update_states check (#13265)
This commit is contained in:
@@ -347,6 +347,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
self.input_batch.block_table.append_row(req_index, start_index,
|
self.input_batch.block_table.append_row(req_index, start_index,
|
||||||
req_data.new_block_ids)
|
req_data.new_block_ids)
|
||||||
|
|
||||||
|
batch_changed = len(removed_req_indices) > 0 or len(req_ids_to_add) > 0
|
||||||
|
|
||||||
# Add the new or resumed requests to the persistent batch.
|
# Add the new or resumed requests to the persistent batch.
|
||||||
# The smaller empty indices are filled first.
|
# The smaller empty indices are filled first.
|
||||||
removed_req_indices = sorted(removed_req_indices, reverse=True)
|
removed_req_indices = sorted(removed_req_indices, reverse=True)
|
||||||
@@ -363,8 +365,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
# Condense the batched states if there are empty indices.
|
# Condense the batched states if there are empty indices.
|
||||||
if removed_req_indices:
|
if removed_req_indices:
|
||||||
self.input_batch.condense(removed_req_indices)
|
self.input_batch.condense(removed_req_indices)
|
||||||
return (len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
|
|
||||||
or len(scheduler_output.finished_req_ids) > 0)
|
return batch_changed
|
||||||
|
|
||||||
def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
|
def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
|
||||||
total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
|
total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
|
||||||
|
|||||||
Reference in New Issue
Block a user