[Core] Adding Priority Scheduling (#5958)

This commit is contained in:
Archit Patke
2024-09-24 21:50:50 -05:00
committed by GitHub
parent 01b6f9e1f0
commit 6da1ab6b41
6 changed files with 410 additions and 8 deletions

View File

@@ -961,7 +961,7 @@ class SchedulerConfig:
workers instead of an entire data. It should be enabled only
when SPMD worker architecture is enabled. I.e.,
VLLM_USE_RAY_SPMD_WORKER=1
policy: The scheduling policy to use. "fcfs" (default) or "priority".
"""
def __init__(self,
@@ -977,7 +977,8 @@ class SchedulerConfig:
preemption_mode: Optional[str] = None,
num_scheduler_steps: int = 1,
multi_step_stream_outputs: bool = False,
send_delta_data: bool = False) -> None:
send_delta_data: bool = False,
policy: str = "fcfs") -> None:
if max_num_batched_tokens is None:
if enable_chunked_prefill:
# It is the values that have the best balance between ITL
@@ -1019,6 +1020,7 @@ class SchedulerConfig:
self.num_scheduler_steps = num_scheduler_steps
self.multi_step_stream_outputs = multi_step_stream_outputs
self.send_delta_data = send_delta_data
self.policy = policy
self._verify_args()
def _verify_args(self) -> None: