[Scheduler] Warning upon preemption and Swapping (#4647)
Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
This commit is contained in:
@@ -737,6 +737,8 @@ class LLMEngine:
|
||||
num_generation_tokens_iter = 0
|
||||
time_to_first_tokens_iter: List[float] = []
|
||||
time_per_output_tokens_iter: List[float] = []
|
||||
num_preemption_iter = (0 if scheduler_outputs is None else
|
||||
scheduler_outputs.preempted)
|
||||
|
||||
# Request stats
|
||||
# Latency
|
||||
@@ -830,7 +832,6 @@ class LLMEngine:
|
||||
|
||||
return Stats(
|
||||
now=now,
|
||||
|
||||
# System stats
|
||||
# Scheduler State
|
||||
num_running_sys=num_running_sys,
|
||||
@@ -846,6 +847,7 @@ class LLMEngine:
|
||||
time_to_first_tokens_iter=time_to_first_tokens_iter,
|
||||
time_per_output_tokens_iter=time_per_output_tokens_iter,
|
||||
spec_decode_metrics=spec_decode_metrics,
|
||||
num_preemption_iter=num_preemption_iter,
|
||||
|
||||
# Request stats
|
||||
# Latency
|
||||
|
||||
Reference in New Issue
Block a user