[Feature] Pluggable platform-specific scheduler (#13161)
Signed-off-by: Yannick Schnider <yannick.schnider1@ibm.com> Signed-off-by: Yannick Schnider <Yannick.Schnider1@ibm.com>
This commit is contained in:
@@ -192,6 +192,7 @@ class EngineArgs:
|
||||
collect_detailed_traces: Optional[str] = None
|
||||
disable_async_output_proc: bool = False
|
||||
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
|
||||
scheduler_cls: Union[str, Type[object]] = "vllm.core.scheduler.Scheduler"
|
||||
|
||||
override_neuron_config: Optional[Dict[str, Any]] = None
|
||||
override_pooler_config: Optional[PoolerConfig] = None
|
||||
@@ -938,6 +939,13 @@ class EngineArgs:
|
||||
'priority (lower value means earlier handling) and time of '
|
||||
'arrival deciding any ties).')
|
||||
|
||||
parser.add_argument(
|
||||
'--scheduler-cls',
|
||||
default=EngineArgs.scheduler_cls,
|
||||
help='The scheduler class to use. "vllm.core.scheduler.Scheduler" '
|
||||
'is the default scheduler. Can be a class directly or the path to '
|
||||
'a class of form "mod.custom_class".')
|
||||
|
||||
parser.add_argument(
|
||||
'--override-neuron-config',
|
||||
type=json.loads,
|
||||
@@ -1273,10 +1281,12 @@ class EngineArgs:
|
||||
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
|
||||
and parallel_config.use_ray),
|
||||
policy=self.scheduling_policy,
|
||||
scheduler_cls=self.scheduler_cls,
|
||||
max_num_partial_prefills=self.max_num_partial_prefills,
|
||||
max_long_partial_prefills=self.max_long_partial_prefills,
|
||||
long_prefill_token_threshold=self.long_prefill_token_threshold,
|
||||
)
|
||||
|
||||
lora_config = LoRAConfig(
|
||||
bias_enabled=self.enable_lora_bias,
|
||||
max_lora_rank=self.max_lora_rank,
|
||||
|
||||
Reference in New Issue
Block a user