[3/N] Refactor scheduler for chunked prefill scheduling (#3550)
This commit is contained in:
@@ -728,7 +728,7 @@ class LLMEngine:
|
||||
time_per_output_tokens = []
|
||||
time_e2e_requests = []
|
||||
if scheduler_outputs is not None:
|
||||
prompt_run = scheduler_outputs.prompt_run
|
||||
prompt_run = scheduler_outputs.num_prefill_groups > 0
|
||||
|
||||
# Number of Tokens.
|
||||
if prompt_run:
|
||||
|
||||
Reference in New Issue
Block a user