[Chunked Prefill][4/n] Chunked prefill scheduler. (#3853)

This commit is contained in:
SangBin Cho
2024-04-06 02:17:58 +09:00
committed by GitHub
parent 1d7c940d74
commit 18de883489
10 changed files with 1217 additions and 182 deletions

View File

@@ -607,11 +607,10 @@ class LLMEngine:
now = time.time()
# Update the scheduled sequence groups with the model outputs.
scheduled_seq_groups = scheduler_outputs.scheduled_seq_groups
for scheduled_seq_group, outputs in zip(scheduled_seq_groups, output):
seq_group = scheduled_seq_group.seq_group
token_chunk_size = scheduled_seq_group.token_chunk_size
seq_group.update_num_computed_tokens(token_chunk_size)
seq_group.update_num_computed_tokens(
scheduled_seq_group.token_chunk_size)
self._process_sequence_group_outputs(seq_group, outputs)
# Free the finished sequence groups.