[V1][Metrics] Handle preemptions (#13169)

This commit is contained in:
Mark McLoughlin
2025-02-27 04:04:59 +00:00
committed by GitHub
parent 378b3ef6f8
commit cd711c48b2
5 changed files with 48 additions and 19 deletions

View File

@@ -227,6 +227,7 @@ EXPECTED_METRICS_V1 = [
"vllm:gpu_cache_usage_perc",
"vllm:gpu_prefix_cache_queries",
"vllm:gpu_prefix_cache_hits",
"vllm:num_preemptions_total",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:iteration_tokens_total",