[Chunked Prefill][4/n] Chunked prefill scheduler. (#3853)
This commit is contained in:
@@ -607,11 +607,10 @@ class LLMEngine:
|
||||
now = time.time()
|
||||
# Update the scheduled sequence groups with the model outputs.
|
||||
scheduled_seq_groups = scheduler_outputs.scheduled_seq_groups
|
||||
|
||||
for scheduled_seq_group, outputs in zip(scheduled_seq_groups, output):
|
||||
seq_group = scheduled_seq_group.seq_group
|
||||
token_chunk_size = scheduled_seq_group.token_chunk_size
|
||||
seq_group.update_num_computed_tokens(token_chunk_size)
|
||||
seq_group.update_num_computed_tokens(
|
||||
scheduled_seq_group.token_chunk_size)
|
||||
self._process_sequence_group_outputs(seq_group, outputs)
|
||||
|
||||
# Free the finished sequence groups.
|
||||
|
||||
Reference in New Issue
Block a user