Implement preemption via recomputation & Refactor scheduling logic (#12)
This commit is contained in:
@@ -76,7 +76,8 @@ class BlockSpaceManager:
|
||||
self.block_tables: Dict[int, BlockTable] = {}
|
||||
|
||||
def can_allocate(self, seq_group: SequenceGroup) -> bool:
|
||||
# NOTE: Here we assume that all sequences in the group have the same prompt.
|
||||
# FIXME(woosuk): Here we assume that all sequences in the group share
|
||||
# the same prompt. This may not be true for preempted sequences.
|
||||
seq = seq_group.seqs[0]
|
||||
num_required_blocks = len(seq.logical_token_blocks)
|
||||
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
|
||||
|
||||
Reference in New Issue
Block a user