[Metrics] Deprecate TPOT in favor of ITL (#24110)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-09-02 19:10:10 +01:00
committed by GitHub
parent 9480ae24e3
commit 2417798471
7 changed files with 64 additions and 26 deletions

View File

@@ -1414,7 +1414,7 @@ class LLMEngine:
num_generation_tokens_iter = 0
num_tokens_iter = 0
time_to_first_tokens_iter: List[float] = []
time_per_output_tokens_iter: List[float] = []
inter_token_latencies_iter: List[float] = []
num_preemption_iter = (0 if scheduler_outputs is None else
scheduler_outputs.preempted)
@@ -1498,9 +1498,9 @@ class LLMEngine:
num_generation_tokens_from_prefill_groups += (
seq_group.num_seqs())
else:
# TPOTs.
# ITLs
latency = seq_group.get_last_token_latency()
time_per_output_tokens_iter.append(latency)
inter_token_latencies_iter.append(latency)
if seq_group.state.current_step == 0:
# For async_output_proc, the do_log_stats()
# is called following init_multi_step(), which
@@ -1582,7 +1582,7 @@ class LLMEngine:
num_generation_tokens_iter=num_generation_tokens_iter,
num_tokens_iter=num_tokens_iter,
time_to_first_tokens_iter=time_to_first_tokens_iter,
time_per_output_tokens_iter=time_per_output_tokens_iter,
inter_token_latencies_iter=inter_token_latencies_iter,
num_preemption_iter=num_preemption_iter,
# Request stats