[3/N] Refactor scheduler for chunked prefill scheduling (#3550)

This commit is contained in:
SangBin Cho
2024-04-04 06:13:49 +09:00
committed by GitHub
parent c64cf38673
commit 3dcb3e8b98
5 changed files with 1021 additions and 256 deletions

View File

@@ -728,7 +728,7 @@ class LLMEngine:
time_per_output_tokens = []
time_e2e_requests = []
if scheduler_outputs is not None:
prompt_run = scheduler_outputs.prompt_run
prompt_run = scheduler_outputs.num_prefill_groups > 0
# Number of Tokens.
if prompt_run: