[V1][Metrics] Remove metrics that were deprecated in 0.8 (#18837)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-05-28 19:54:12 +01:00
committed by GitHub
parent c68b5c63eb
commit 0e98964e94
6 changed files with 1 additions and 156 deletions

View File

@@ -1680,9 +1680,6 @@ class LLMEngine:
time_inference_requests: List[float] = []
time_prefill_requests: List[float] = []
time_decode_requests: List[float] = []
time_in_queue_requests: List[float] = []
model_forward_time_requests: List[float] = []
model_execute_time_requests: List[float] = []
# Metadata
num_prompt_tokens_requests: List[int] = []
num_generation_tokens_requests: List[int] = []
@@ -1790,15 +1787,6 @@ class LLMEngine:
now - seq_group.metrics.first_token_time)
time_inference_requests.append(
now - seq_group.metrics.first_scheduled_time)
if seq_group.metrics.time_in_queue is not None:
time_in_queue_requests.append(
seq_group.metrics.time_in_queue)
if seq_group.metrics.model_forward_time is not None:
model_forward_time_requests.append(
seq_group.metrics.model_forward_time)
if seq_group.metrics.model_execute_time is not None:
model_execute_time_requests.append(
seq_group.metrics.model_execute_time * 1000)
# Metadata
num_prompt_tokens_requests.append(
len(seq_group.prompt_token_ids))
@@ -1867,9 +1855,6 @@ class LLMEngine:
time_inference_requests=time_inference_requests,
time_prefill_requests=time_prefill_requests,
time_decode_requests=time_decode_requests,
time_in_queue_requests=time_in_queue_requests,
model_forward_time_requests=model_forward_time_requests,
model_execute_time_requests=model_execute_time_requests,
# Metadata
num_prompt_tokens_requests=num_prompt_tokens_requests,
num_generation_tokens_requests=num_generation_tokens_requests,