[Bugfix] If chunked_prefill is disabled, end the scheduling early. (#28911)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -508,9 +508,9 @@ class Scheduler(SchedulerInterface):
|
||||
not self.scheduler_config.enable_chunked_prefill
|
||||
and num_new_tokens > token_budget
|
||||
):
|
||||
self.waiting.pop_request()
|
||||
skipped_waiting_requests.prepend_request(request)
|
||||
continue
|
||||
# If chunked_prefill is disabled,
|
||||
# we can stop the scheduling here.
|
||||
break
|
||||
|
||||
num_new_tokens = min(num_new_tokens, token_budget)
|
||||
assert num_new_tokens > 0
|
||||
|
||||
Reference in New Issue
Block a user