Implement preemption via recomputation & Refactor scheduling logic (#12)

2023-03-30 14:51:46 -07:00
parent 88c0268a18
commit 7a7929abe8
7 changed files with 277 additions and 124 deletions
--- a/cacheflow/sequence.py
+++ b/cacheflow/sequence.py
@@ -7,7 +7,7 @@ from cacheflow.sampling_params import SamplingParams


 class SequenceStatus(enum.Enum):
-    PENDING = enum.auto()
+    WAITING = enum.auto()
    RUNNING = enum.auto()
    SWAPPED = enum.auto()
    FINISHED = enum.auto()
@@ -28,7 +28,7 @@ class Sequence:
        # Initialize the logical token blocks with the given token ids.
        self.add(token_ids)

-        self.status = SequenceStatus.PENDING
+        self.status = SequenceStatus.WAITING
        self.output_logprobs: List[Dict[int, float]] = []
        self.cumulative_logprobs = 0.0

@@ -88,9 +88,11 @@ class SequenceGroup:
        self,
        group_id: int,
        seqs: List[Sequence],
+        arrival_time: float,
    ) -> None:
        self.group_id = group_id
        self.seqs = seqs
+        self.arrival_time = arrival_time

    def get_seqs(
        self,