[V0 Deprecation] Remove async_output_proc, preemption mode, delay factor (#25334)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -409,9 +409,7 @@ class EngineArgs:
|
||||
get_field(LoadConfig, "model_loader_extra_config")
|
||||
ignore_patterns: Optional[Union[str,
|
||||
List[str]]] = LoadConfig.ignore_patterns
|
||||
preemption_mode: Optional[str] = SchedulerConfig.preemption_mode
|
||||
|
||||
scheduler_delay_factor: float = SchedulerConfig.delay_factor
|
||||
enable_chunked_prefill: Optional[
|
||||
bool] = SchedulerConfig.enable_chunked_prefill
|
||||
disable_chunked_mm_input: bool = SchedulerConfig.disable_chunked_mm_input
|
||||
@@ -439,7 +437,6 @@ class EngineArgs:
|
||||
ObservabilityConfig.otlp_traces_endpoint
|
||||
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
|
||||
ObservabilityConfig.collect_detailed_traces
|
||||
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
|
||||
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
|
||||
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
|
||||
|
||||
@@ -561,14 +558,6 @@ class EngineArgs:
|
||||
**model_kwargs["enable_prompt_embeds"])
|
||||
model_group.add_argument("--served-model-name",
|
||||
**model_kwargs["served_model_name"])
|
||||
# This one is a special case because it is the
|
||||
# opposite of ModelConfig.use_async_output_proc
|
||||
model_group.add_argument(
|
||||
"--disable-async-output-proc",
|
||||
action="store_true",
|
||||
default=EngineArgs.disable_async_output_proc,
|
||||
help="Disable async output processing. This may result in "
|
||||
"lower performance.")
|
||||
model_group.add_argument("--config-format",
|
||||
**model_kwargs["config_format"])
|
||||
# This one is a special case because it can bool
|
||||
@@ -897,10 +886,6 @@ class EngineArgs:
|
||||
**scheduler_kwargs["long_prefill_token_threshold"])
|
||||
scheduler_group.add_argument("--num-lookahead-slots",
|
||||
**scheduler_kwargs["num_lookahead_slots"])
|
||||
scheduler_group.add_argument("--scheduler-delay-factor",
|
||||
**scheduler_kwargs["delay_factor"])
|
||||
scheduler_group.add_argument("--preemption-mode",
|
||||
**scheduler_kwargs["preemption_mode"])
|
||||
# multi-step scheduling has been removed; corresponding arguments
|
||||
# are no longer supported.
|
||||
scheduler_group.add_argument("--scheduling-policy",
|
||||
@@ -1029,7 +1014,6 @@ class EngineArgs:
|
||||
interleave_mm_strings=self.interleave_mm_strings,
|
||||
media_io_kwargs=self.media_io_kwargs,
|
||||
skip_mm_profiling=self.skip_mm_profiling,
|
||||
use_async_output_proc=not self.disable_async_output_proc,
|
||||
config_format=self.config_format,
|
||||
mm_processor_kwargs=self.mm_processor_kwargs,
|
||||
mm_processor_cache_gb=self.mm_processor_cache_gb,
|
||||
@@ -1395,11 +1379,9 @@ class EngineArgs:
|
||||
max_model_len=model_config.max_model_len,
|
||||
cuda_graph_sizes=self.cuda_graph_sizes,
|
||||
num_lookahead_slots=num_lookahead_slots,
|
||||
delay_factor=self.scheduler_delay_factor,
|
||||
enable_chunked_prefill=self.enable_chunked_prefill,
|
||||
disable_chunked_mm_input=self.disable_chunked_mm_input,
|
||||
is_multimodal_model=model_config.is_multimodal_model,
|
||||
preemption_mode=self.preemption_mode,
|
||||
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
|
||||
and parallel_config.use_ray),
|
||||
policy=self.scheduling_policy,
|
||||
@@ -1492,22 +1474,6 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
if self.preemption_mode != SchedulerConfig.preemption_mode:
|
||||
_raise_or_fallback(feature_name="--preemption-mode",
|
||||
recommend_to_remove=True)
|
||||
return False
|
||||
|
||||
if (self.disable_async_output_proc
|
||||
!= EngineArgs.disable_async_output_proc):
|
||||
_raise_or_fallback(feature_name="--disable-async-output-proc",
|
||||
recommend_to_remove=True)
|
||||
return False
|
||||
|
||||
if self.scheduler_delay_factor != SchedulerConfig.delay_factor:
|
||||
_raise_or_fallback(feature_name="--scheduler-delay-factor",
|
||||
recommend_to_remove=True)
|
||||
return False
|
||||
|
||||
# No Mamba or Encoder-Decoder so far.
|
||||
if not model_config.is_v1_compatible:
|
||||
_raise_or_fallback(feature_name=model_config.architectures,
|
||||
|
||||
Reference in New Issue
Block a user