[Core] Adding Priority Scheduling (#5958)
This commit is contained in:
@@ -961,7 +961,7 @@ class SchedulerConfig:
|
||||
workers instead of an entire data. It should be enabled only
|
||||
when SPMD worker architecture is enabled. I.e.,
|
||||
VLLM_USE_RAY_SPMD_WORKER=1
|
||||
|
||||
policy: The scheduling policy to use. "fcfs" (default) or "priority".
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -977,7 +977,8 @@ class SchedulerConfig:
|
||||
preemption_mode: Optional[str] = None,
|
||||
num_scheduler_steps: int = 1,
|
||||
multi_step_stream_outputs: bool = False,
|
||||
send_delta_data: bool = False) -> None:
|
||||
send_delta_data: bool = False,
|
||||
policy: str = "fcfs") -> None:
|
||||
if max_num_batched_tokens is None:
|
||||
if enable_chunked_prefill:
|
||||
# It is the values that have the best balance between ITL
|
||||
@@ -1019,6 +1020,7 @@ class SchedulerConfig:
|
||||
self.num_scheduler_steps = num_scheduler_steps
|
||||
self.multi_step_stream_outputs = multi_step_stream_outputs
|
||||
self.send_delta_data = send_delta_data
|
||||
self.policy = policy
|
||||
self._verify_args()
|
||||
|
||||
def _verify_args(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user