Implement preemption via recomputation & Refactor scheduling logic (#12)

This commit is contained in:
Woosuk Kwon
2023-03-30 14:51:46 -07:00
committed by GitHub
parent 88c0268a18
commit 7a7929abe8
7 changed files with 277 additions and 124 deletions

View File

@@ -10,6 +10,7 @@ from cacheflow.worker.controller import Controller, DeviceID
from cacheflow.sequence import SequenceGroup
from cacheflow.sampling_params import SamplingParams
class Server:
def __init__(
self,
@@ -91,7 +92,7 @@ class Server:
return self.scheduler.step()
def has_unfinished_requests(self):
return (self.scheduler.pending or self.scheduler.running or
return (self.scheduler.waiting or self.scheduler.running or
self.scheduler.swapped)