[Optimization] Make new_block_ids None if empty (#23262)
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
@@ -574,11 +574,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
|
||||
# Update the block IDs.
|
||||
if not resumed_from_preemption:
|
||||
# Append the new blocks to the existing block IDs.
|
||||
for block_ids, new_ids in zip(req_state.block_ids,
|
||||
new_block_ids):
|
||||
block_ids.extend(new_ids)
|
||||
if new_block_ids is not None:
|
||||
# Append the new blocks to the existing block IDs.
|
||||
for block_ids, new_ids in zip(req_state.block_ids,
|
||||
new_block_ids):
|
||||
block_ids.extend(new_ids)
|
||||
else:
|
||||
assert new_block_ids is not None
|
||||
# The request is resumed from preemption.
|
||||
# Replace the existing block IDs with the new ones.
|
||||
req_state.block_ids = new_block_ids
|
||||
@@ -594,7 +596,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
# Update the persistent batch.
|
||||
self.input_batch.num_computed_tokens_cpu[req_index] = (
|
||||
num_computed_tokens)
|
||||
self.input_batch.block_table.append_row(new_block_ids, req_index)
|
||||
if new_block_ids is not None:
|
||||
self.input_batch.block_table.append_row(
|
||||
new_block_ids, req_index)
|
||||
|
||||
# For the last rank, we don't need to update the token_ids_cpu
|
||||
# because the sampled tokens are already cached.
|
||||
|
||||
Reference in New Issue
Block a user