[V0 Deprecation] Remove args for multi-step scheduling (#22779)

Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
Woosuk Kwon
2025-08-12 20:38:18 -07:00
committed by GitHub
parent d31f97cf57
commit c5830381af
2 changed files with 1 additions and 27 deletions

View File

@@ -115,12 +115,6 @@ class SchedulerConfig:
(e.g., beam search), recomputation is not currently supported. In
such a case, we use swapping instead."""
num_scheduler_steps: int = 1
"""Maximum number of forward steps per scheduler call."""
multi_step_stream_outputs: bool = True
"""If False, then multi-step will stream outputs at the end of all steps"""
send_delta_data: bool = False
"""Private API. If used, scheduler sends delta data to
workers instead of an entire data. It should be enabled only
@@ -193,16 +187,7 @@ class SchedulerConfig:
if self.max_num_batched_tokens is None:
if self.enable_chunked_prefill:
if self.num_scheduler_steps > 1:
# Multi-step Chunked-Prefill doesn't allow prompt-chunking
# for now. Have max_num_batched_tokens set to max_model_len
# so we don't reject sequences on account of a short
# max_num_batched_tokens.
self.max_num_batched_tokens = max(
self.max_model_len, DEFAULT_MAX_NUM_BATCHED_TOKENS)
else:
self.max_num_batched_tokens = (
DEFAULT_MAX_NUM_BATCHED_TOKENS)
self.max_num_batched_tokens = DEFAULT_MAX_NUM_BATCHED_TOKENS
else:
# If max_model_len is too short, use
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
@@ -293,12 +278,6 @@ class SchedulerConfig:
f"({self.num_lookahead_slots}) must be greater than or "
"equal to 0.")
if self.num_scheduler_steps < 1:
raise ValueError(
"num_scheduler_steps "
f"({self.num_scheduler_steps}) must be greater than or "
"equal to 1.")
if self.max_num_partial_prefills < 1:
raise ValueError(
f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
@@ -323,7 +302,3 @@ class SchedulerConfig:
f"max_num_partial_prefills ({self.max_num_partial_prefills}).")
return self
@property
def is_multi_step(self) -> bool:
return self.num_scheduler_steps > 1