Implement preemption via recomputation & Refactor scheduling logic (#12)

This commit is contained in:
Woosuk Kwon
2023-03-30 14:51:46 -07:00
committed by GitHub
parent 88c0268a18
commit 7a7929abe8
7 changed files with 277 additions and 124 deletions

View File

@@ -84,8 +84,9 @@ class FastAPIFrontend:
seq = Sequence(seq_id, token_ids, block_size=self.block_size)
seqs.append(seq)
arrival_time = time.time()
group_id = next(self.seq_group_counter)
seq_group = SequenceGroup(group_id, seqs)
seq_group = SequenceGroup(group_id, seqs, arrival_time)
group_event = asyncio.Event()
self.sequence_group_events[group_id] = group_event
await self.server.add_sequence_groups.remote([(seq_group, sampling_params)])