Add Production Metrics in Prometheus format (#1890)

This commit is contained in:
Simon Mo
2023-12-02 16:37:44 -08:00
committed by GitHub
parent 5f09cbdb63
commit 5313c2cb8b
6 changed files with 89 additions and 2 deletions

View File

@@ -7,6 +7,7 @@ from vllm.config import (CacheConfig, ModelConfig, ParallelConfig,
SchedulerConfig)
from vllm.core.scheduler import Scheduler, SchedulerOutputs
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.metrics import record_metrics
from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
from vllm.logger import init_logger
from vllm.outputs import RequestOutput
@@ -591,8 +592,8 @@ class LLMEngine:
else:
self.num_generation_tokens.append((now, num_batched_tokens))
elapsed_time = now - self.last_logging_time
if elapsed_time < _LOGGING_INTERVAL_SEC:
should_log = now - self.last_logging_time >= _LOGGING_INTERVAL_SEC
if not should_log:
return
# Discard the old stats.
@@ -631,6 +632,16 @@ class LLMEngine:
else:
cpu_cache_usage = 0.0
record_metrics(
avg_prompt_throughput=avg_prompt_throughput,
avg_generation_throughput=avg_generation_throughput,
scheduler_running=len(self.scheduler.running),
scheduler_swapped=len(self.scheduler.swapped),
scheduler_waiting=len(self.scheduler.waiting),
gpu_cache_usage=gpu_cache_usage,
cpu_cache_usage=cpu_cache_usage,
)
logger.info("Avg prompt throughput: "
f"{avg_prompt_throughput:.1f} tokens/s, "
"Avg generation throughput: "