[Misc] Add metrics for request queue time, forward time, and execute time (#9659)

This commit is contained in:
科英
2024-10-30 01:32:56 +08:00
committed by GitHub
parent 622b7ab955
commit 74fc2d77ae
4 changed files with 70 additions and 15 deletions

View File

@@ -1645,6 +1645,9 @@ class LLMEngine:
# Request stats
# Latency
time_e2e_requests: List[float] = []
time_in_queue_requests: List[float] = []
model_forward_time_requests: List[float] = []
model_execute_time_requests: List[float] = []
# Metadata
num_prompt_tokens_requests: List[int] = []
num_generation_tokens_requests: List[int] = []
@@ -1738,6 +1741,15 @@ class LLMEngine:
# Latency timings
time_e2e_requests.append(now -
seq_group.metrics.arrival_time)
if seq_group.metrics.time_in_queue is not None:
time_in_queue_requests.append(
seq_group.metrics.time_in_queue)
if seq_group.metrics.model_forward_time is not None:
model_forward_time_requests.append(
seq_group.metrics.model_forward_time)
if seq_group.metrics.model_execute_time is not None:
model_execute_time_requests.append(
seq_group.metrics.model_execute_time * 1000)
# Metadata
num_prompt_tokens_requests.append(
len(seq_group.prompt_token_ids))
@@ -1795,6 +1807,9 @@ class LLMEngine:
# Request stats
# Latency
time_e2e_requests=time_e2e_requests,
time_in_queue_requests=time_in_queue_requests,
model_forward_time_requests=model_forward_time_requests,
model_execute_time_requests=model_execute_time_requests,
# Metadata
num_prompt_tokens_requests=num_prompt_tokens_requests,
num_generation_tokens_requests=num_generation_tokens_requests,