[V0 Deprecation] Remove async_output_proc, preemption mode, delay factor (#25334)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-21 08:52:32 -07:00
committed by GitHub
parent 26e673fe93
commit 0ff8ebb2d7
15 changed files with 12 additions and 210 deletions

View File

@@ -409,9 +409,7 @@ class EngineArgs:
get_field(LoadConfig, "model_loader_extra_config")
ignore_patterns: Optional[Union[str,
List[str]]] = LoadConfig.ignore_patterns
preemption_mode: Optional[str] = SchedulerConfig.preemption_mode
scheduler_delay_factor: float = SchedulerConfig.delay_factor
enable_chunked_prefill: Optional[
bool] = SchedulerConfig.enable_chunked_prefill
disable_chunked_mm_input: bool = SchedulerConfig.disable_chunked_mm_input
@@ -439,7 +437,6 @@ class EngineArgs:
ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
ObservabilityConfig.collect_detailed_traces
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
@@ -561,14 +558,6 @@ class EngineArgs:
**model_kwargs["enable_prompt_embeds"])
model_group.add_argument("--served-model-name",
**model_kwargs["served_model_name"])
# This one is a special case because it is the
# opposite of ModelConfig.use_async_output_proc
model_group.add_argument(
"--disable-async-output-proc",
action="store_true",
default=EngineArgs.disable_async_output_proc,
help="Disable async output processing. This may result in "
"lower performance.")
model_group.add_argument("--config-format",
**model_kwargs["config_format"])
# This one is a special case because it can bool
@@ -897,10 +886,6 @@ class EngineArgs:
**scheduler_kwargs["long_prefill_token_threshold"])
scheduler_group.add_argument("--num-lookahead-slots",
**scheduler_kwargs["num_lookahead_slots"])
scheduler_group.add_argument("--scheduler-delay-factor",
**scheduler_kwargs["delay_factor"])
scheduler_group.add_argument("--preemption-mode",
**scheduler_kwargs["preemption_mode"])
# multi-step scheduling has been removed; corresponding arguments
# are no longer supported.
scheduler_group.add_argument("--scheduling-policy",
@@ -1029,7 +1014,6 @@ class EngineArgs:
interleave_mm_strings=self.interleave_mm_strings,
media_io_kwargs=self.media_io_kwargs,
skip_mm_profiling=self.skip_mm_profiling,
use_async_output_proc=not self.disable_async_output_proc,
config_format=self.config_format,
mm_processor_kwargs=self.mm_processor_kwargs,
mm_processor_cache_gb=self.mm_processor_cache_gb,
@@ -1395,11 +1379,9 @@ class EngineArgs:
max_model_len=model_config.max_model_len,
cuda_graph_sizes=self.cuda_graph_sizes,
num_lookahead_slots=num_lookahead_slots,
delay_factor=self.scheduler_delay_factor,
enable_chunked_prefill=self.enable_chunked_prefill,
disable_chunked_mm_input=self.disable_chunked_mm_input,
is_multimodal_model=model_config.is_multimodal_model,
preemption_mode=self.preemption_mode,
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
and parallel_config.use_ray),
policy=self.scheduling_policy,
@@ -1492,22 +1474,6 @@ class EngineArgs:
recommend_to_remove=False)
return False
if self.preemption_mode != SchedulerConfig.preemption_mode:
_raise_or_fallback(feature_name="--preemption-mode",
recommend_to_remove=True)
return False
if (self.disable_async_output_proc
!= EngineArgs.disable_async_output_proc):
_raise_or_fallback(feature_name="--disable-async-output-proc",
recommend_to_remove=True)
return False
if self.scheduler_delay_factor != SchedulerConfig.delay_factor:
_raise_or_fallback(feature_name="--scheduler-delay-factor",
recommend_to_remove=True)
return False
# No Mamba or Encoder-Decoder so far.
if not model_config.is_v1_compatible:
_raise_or_fallback(feature_name=model_config.architectures,