[V1][Metrics] Add GPU prefix cache hit rate % gauge (#12592)
This commit is contained in:
@@ -203,6 +203,8 @@ EXPECTED_METRICS_V1 = [
|
||||
"vllm:num_requests_running",
|
||||
"vllm:num_requests_waiting",
|
||||
"vllm:gpu_cache_usage_perc",
|
||||
"vllm:gpu_prefix_cache_queries",
|
||||
"vllm:gpu_prefix_cache_hits",
|
||||
"vllm:prompt_tokens_total",
|
||||
"vllm:generation_tokens_total",
|
||||
"vllm:request_success_total",
|
||||
|
||||
Reference in New Issue
Block a user