[V1][Metrics] Add per-request prompt/generation_tokens histograms (#12516)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-01-28 22:07:22 +00:00
committed by GitHub
parent f26d790718
commit c386c43ca3
5 changed files with 103 additions and 15 deletions

View File

@@ -202,6 +202,12 @@ EXPECTED_METRICS_V1 = [
"vllm:num_requests_waiting",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:request_prompt_tokens_sum",
"vllm:request_prompt_tokens_bucket",
"vllm:request_prompt_tokens_count",
"vllm:request_generation_tokens_sum",
"vllm:request_generation_tokens_bucket",
"vllm:request_generation_tokens_count",
]