Implement preemption via recomputation & Refactor scheduling logic (#12)

This commit is contained in:
Woosuk Kwon
2023-03-30 14:51:46 -07:00
committed by GitHub
parent 88c0268a18
commit 7a7929abe8
7 changed files with 277 additions and 124 deletions

View File

@@ -7,7 +7,7 @@ from cacheflow.sampling_params import SamplingParams
class SequenceStatus(enum.Enum):
PENDING = enum.auto()
WAITING = enum.auto()
RUNNING = enum.auto()
SWAPPED = enum.auto()
FINISHED = enum.auto()
@@ -28,7 +28,7 @@ class Sequence:
# Initialize the logical token blocks with the given token ids.
self.add(token_ids)
self.status = SequenceStatus.PENDING
self.status = SequenceStatus.WAITING
self.output_logprobs: List[Dict[int, float]] = []
self.cumulative_logprobs = 0.0
@@ -88,9 +88,11 @@ class SequenceGroup:
self,
group_id: int,
seqs: List[Sequence],
arrival_time: float,
) -> None:
self.group_id = group_id
self.seqs = seqs
self.arrival_time = arrival_time
def get_seqs(
self,