[V1][Metrics] Add GPU prefix cache hit rate % gauge (#12592)

This commit is contained in:
Cody Yu
2025-02-11 00:27:25 -08:00
committed by GitHub
parent fc6485d277
commit 41c5dd45b9
7 changed files with 174 additions and 5 deletions

View File

@@ -203,6 +203,8 @@ EXPECTED_METRICS_V1 = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
"vllm:gpu_cache_usage_perc",
"vllm:gpu_prefix_cache_queries",
"vllm:gpu_prefix_cache_hits",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:request_success_total",