[Scheduler] Warning upon preemption and Swapping (#4647)

Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
This commit is contained in:
SangBin Cho
2024-05-13 23:50:44 +09:00
committed by GitHub
parent 350f9e107f
commit e7c46b9527
7 changed files with 108 additions and 3 deletions

View File

@@ -737,6 +737,8 @@ class LLMEngine:
num_generation_tokens_iter = 0
time_to_first_tokens_iter: List[float] = []
time_per_output_tokens_iter: List[float] = []
num_preemption_iter = (0 if scheduler_outputs is None else
scheduler_outputs.preempted)
# Request stats
# Latency
@@ -830,7 +832,6 @@ class LLMEngine:
return Stats(
now=now,
# System stats
# Scheduler State
num_running_sys=num_running_sys,
@@ -846,6 +847,7 @@ class LLMEngine:
time_to_first_tokens_iter=time_to_first_tokens_iter,
time_per_output_tokens_iter=time_per_output_tokens_iter,
spec_decode_metrics=spec_decode_metrics,
num_preemption_iter=num_preemption_iter,
# Request stats
# Latency