[Misc] Add metrics for request queue time, forward time, and execute time (#9659)
This commit is contained in:
@@ -1645,6 +1645,9 @@ class LLMEngine:
|
||||
# Request stats
|
||||
# Latency
|
||||
time_e2e_requests: List[float] = []
|
||||
time_in_queue_requests: List[float] = []
|
||||
model_forward_time_requests: List[float] = []
|
||||
model_execute_time_requests: List[float] = []
|
||||
# Metadata
|
||||
num_prompt_tokens_requests: List[int] = []
|
||||
num_generation_tokens_requests: List[int] = []
|
||||
@@ -1738,6 +1741,15 @@ class LLMEngine:
|
||||
# Latency timings
|
||||
time_e2e_requests.append(now -
|
||||
seq_group.metrics.arrival_time)
|
||||
if seq_group.metrics.time_in_queue is not None:
|
||||
time_in_queue_requests.append(
|
||||
seq_group.metrics.time_in_queue)
|
||||
if seq_group.metrics.model_forward_time is not None:
|
||||
model_forward_time_requests.append(
|
||||
seq_group.metrics.model_forward_time)
|
||||
if seq_group.metrics.model_execute_time is not None:
|
||||
model_execute_time_requests.append(
|
||||
seq_group.metrics.model_execute_time * 1000)
|
||||
# Metadata
|
||||
num_prompt_tokens_requests.append(
|
||||
len(seq_group.prompt_token_ids))
|
||||
@@ -1795,6 +1807,9 @@ class LLMEngine:
|
||||
# Request stats
|
||||
# Latency
|
||||
time_e2e_requests=time_e2e_requests,
|
||||
time_in_queue_requests=time_in_queue_requests,
|
||||
model_forward_time_requests=model_forward_time_requests,
|
||||
model_execute_time_requests=model_execute_time_requests,
|
||||
# Metadata
|
||||
num_prompt_tokens_requests=num_prompt_tokens_requests,
|
||||
num_generation_tokens_requests=num_generation_tokens_requests,
|
||||
|
||||
Reference in New Issue
Block a user