Move missed SchedulerConfig args into scheduler config group in EngineArgs (#17131)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -1778,6 +1778,7 @@ class ParallelConfig:
|
|||||||
"worker_extension_cls must be a string (qualified class name).")
|
"worker_extension_cls must be a string (qualified class name).")
|
||||||
|
|
||||||
|
|
||||||
|
PreemptionMode = Literal["swap", "recompute"]
|
||||||
SchedulerPolicy = Literal["fcfs", "priority"]
|
SchedulerPolicy = Literal["fcfs", "priority"]
|
||||||
|
|
||||||
|
|
||||||
@@ -1854,7 +1855,7 @@ class SchedulerConfig:
|
|||||||
NOTE: This is not currently configurable. It will be overridden by
|
NOTE: This is not currently configurable. It will be overridden by
|
||||||
max_num_batched_tokens in case max multimodal embedding size is larger."""
|
max_num_batched_tokens in case max multimodal embedding size is larger."""
|
||||||
|
|
||||||
preemption_mode: Optional[str] = None
|
preemption_mode: Optional[PreemptionMode] = None
|
||||||
"""Whether to perform preemption by swapping or
|
"""Whether to perform preemption by swapping or
|
||||||
recomputation. If not specified, we determine the mode as follows:
|
recomputation. If not specified, we determine the mode as follows:
|
||||||
We use recomputation by default since it incurs lower overhead than
|
We use recomputation by default since it incurs lower overhead than
|
||||||
|
|||||||
@@ -753,12 +753,6 @@ class EngineArgs:
|
|||||||
)
|
)
|
||||||
device_group.add_argument("--device", **device_kwargs["device"])
|
device_group.add_argument("--device", **device_kwargs["device"])
|
||||||
|
|
||||||
parser.add_argument('--num-scheduler-steps',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help=('Maximum number of forward steps per '
|
|
||||||
'scheduler call.'))
|
|
||||||
|
|
||||||
# Speculative arguments
|
# Speculative arguments
|
||||||
speculative_group = parser.add_argument_group(
|
speculative_group = parser.add_argument_group(
|
||||||
title="SpeculativeConfig",
|
title="SpeculativeConfig",
|
||||||
@@ -779,13 +773,6 @@ class EngineArgs:
|
|||||||
help="The pattern(s) to ignore when loading the model."
|
help="The pattern(s) to ignore when loading the model."
|
||||||
"Default to `original/**/*` to avoid repeated loading of llama's "
|
"Default to `original/**/*` to avoid repeated loading of llama's "
|
||||||
"checkpoints.")
|
"checkpoints.")
|
||||||
parser.add_argument(
|
|
||||||
'--preemption-mode',
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help='If \'recompute\', the engine performs preemption by '
|
|
||||||
'recomputing; If \'swap\', the engine performs preemption by '
|
|
||||||
'block swapping.')
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--served-model-name",
|
"--served-model-name",
|
||||||
@@ -865,14 +852,18 @@ class EngineArgs:
|
|||||||
**scheduler_kwargs["num_lookahead_slots"])
|
**scheduler_kwargs["num_lookahead_slots"])
|
||||||
scheduler_group.add_argument('--scheduler-delay-factor',
|
scheduler_group.add_argument('--scheduler-delay-factor',
|
||||||
**scheduler_kwargs["delay_factor"])
|
**scheduler_kwargs["delay_factor"])
|
||||||
scheduler_group.add_argument(
|
scheduler_group.add_argument('--preemption-mode',
|
||||||
'--enable-chunked-prefill',
|
**scheduler_kwargs["preemption_mode"])
|
||||||
**scheduler_kwargs["enable_chunked_prefill"])
|
scheduler_group.add_argument('--num-scheduler-steps',
|
||||||
|
**scheduler_kwargs["num_scheduler_steps"])
|
||||||
scheduler_group.add_argument(
|
scheduler_group.add_argument(
|
||||||
'--multi-step-stream-outputs',
|
'--multi-step-stream-outputs',
|
||||||
**scheduler_kwargs["multi_step_stream_outputs"])
|
**scheduler_kwargs["multi_step_stream_outputs"])
|
||||||
scheduler_group.add_argument('--scheduling-policy',
|
scheduler_group.add_argument('--scheduling-policy',
|
||||||
**scheduler_kwargs["policy"])
|
**scheduler_kwargs["policy"])
|
||||||
|
scheduler_group.add_argument(
|
||||||
|
'--enable-chunked-prefill',
|
||||||
|
**scheduler_kwargs["enable_chunked_prefill"])
|
||||||
scheduler_group.add_argument(
|
scheduler_group.add_argument(
|
||||||
"--disable-chunked-mm-input",
|
"--disable-chunked-mm-input",
|
||||||
**scheduler_kwargs["disable_chunked_mm_input"])
|
**scheduler_kwargs["disable_chunked_mm_input"])
|
||||||
|
|||||||
Reference in New Issue
Block a user