[V0 Deprecation] Remove multi-step scheduling (#22138)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
Woosuk Kwon
2025-08-12 20:18:39 -07:00
committed by GitHub
parent e18859298d
commit 71683ca6f6
37 changed files with 57 additions and 3465 deletions

View File

@@ -26,15 +26,12 @@ DEFAULT_ARGS = ["--max-model-len", "4096"]
MORE_ARGS_LIST = [
[], # Default
["--enable-chunked-prefill"], # Chunked
["--num-scheduler-steps", "8"], # MS
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
]
MAX_WAIT_SECONDS = None
if current_platform.is_tpu():
MORE_ARGS_LIST = [
[], # Default
# ["--num-scheduler-steps", "8"], # Multi-step << currently fails
]
MAX_WAIT_SECONDS = 600