[Misc] Fix typos in scheduler.py (#7285)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
Rui Qiao
2024-08-07 17:06:01 -07:00
committed by GitHub
parent e53dfd3eaf
commit 746709642c

View File

@@ -678,7 +678,7 @@ class Scheduler:
all tokens.
Returns:
SchedulerSwappedInOutputs.
SchedulerPrefillOutputs.
"""
ignored_seq_groups: List[SequenceGroup] = []
seq_groups: List[SequenceGroup] = []
@@ -851,7 +851,7 @@ class Scheduler:
preempted=preempted,
)
def _schedule_chunked_prefill(self):
def _schedule_chunked_prefill(self) -> SchedulerOutputs:
"""Schedule queued requests.
Chunked prefill allows to chunk prefill requests, batch them together
@@ -862,7 +862,7 @@ class Scheduler:
The policy can sustain the high GPU utilization because it can put
prefill and decodes requests to the same batch, while it improves
inter token latency because decodes requests don't need to blocked
inter token latency because decodes requests don't need to be blocked
by prefill requests.
"""
budget = SchedulingBudget(