[V1][Metrics] Add GPU cache usage % gauge (#12561)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-01-30 02:31:01 +00:00
committed by GitHub
parent 1c1bb0bbf2
commit f17f1d4608
5 changed files with 18 additions and 2 deletions

View File

@@ -200,6 +200,7 @@ EXPECTED_METRICS = [
EXPECTED_METRICS_V1 = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
"vllm:gpu_cache_usage_perc",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:request_prompt_tokens_sum",