Implement preemption via recomputation & Refactor scheduling logic (#12)

This commit is contained in:
Woosuk Kwon
2023-03-30 14:51:46 -07:00
committed by GitHub
parent 88c0268a18
commit 7a7929abe8
7 changed files with 277 additions and 124 deletions

View File

@@ -76,7 +76,8 @@ class BlockSpaceManager:
self.block_tables: Dict[int, BlockTable] = {}
def can_allocate(self, seq_group: SequenceGroup) -> bool:
# NOTE: Here we assume that all sequences in the group have the same prompt.
# FIXME(woosuk): Here we assume that all sequences in the group share
# the same prompt. This may not be true for preempted sequences.
seq = seq_group.seqs[0]
num_required_blocks = len(seq.logical_token_blocks)
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()