Re-enable the 80 char line width limit (#3305)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from vllm.logger import init_logger
|
||||
from prometheus_client import Counter, Gauge, Histogram, Info, REGISTRY, disable_created_metrics
|
||||
from prometheus_client import (Counter, Gauge, Histogram, Info, REGISTRY,
|
||||
disable_created_metrics)
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
@@ -177,10 +178,12 @@ class StatLogger:
|
||||
def _log_prometheus_interval(self, prompt_throughput: float,
|
||||
generation_throughput: float) -> None:
|
||||
# Logs metrics to prometheus that are computed every logging_interval.
|
||||
# Support legacy gauge metrics that make throughput calculations on the vLLM side.
|
||||
# Moving forward, we should use counters like counter_prompt_tokens, counter_generation_tokens
|
||||
# Which log raw data and calculate summaries using rate() on the grafana/prometheus side.
|
||||
# See https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
|
||||
# Support legacy gauge metrics that make throughput calculations on
|
||||
# the vLLM side. Moving forward, we should use counters like
|
||||
# counter_prompt_tokens, counter_generation_tokens
|
||||
# Which log raw data and calculate summaries using rate() on the
|
||||
# grafana/prometheus side. See
|
||||
# https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
|
||||
self.metrics.gauge_avg_prompt_throughput.labels(
|
||||
**self.labels).set(prompt_throughput)
|
||||
self.metrics.gauge_avg_generation_throughput.labels(
|
||||
@@ -188,7 +191,7 @@ class StatLogger:
|
||||
|
||||
def log(self, stats: Stats) -> None:
|
||||
"""Called by LLMEngine.
|
||||
Logs to prometheus and tracked stats every iteration.
|
||||
Logs to prometheus and tracked stats every iteration.
|
||||
Logs to Stdout every self.local_interval seconds."""
|
||||
|
||||
# Log to prometheus.
|
||||
@@ -200,8 +203,8 @@ class StatLogger:
|
||||
|
||||
# Log locally every local_interval seconds.
|
||||
if self._local_interval_elapsed(stats.now):
|
||||
|
||||
# Compute summary metrics for tracked stats (and log them to promethus if applicable).
|
||||
# Compute summary metrics for tracked stats (and log them
|
||||
# to promethus if applicable).
|
||||
prompt_throughput = self._get_throughput(self.num_prompt_tokens,
|
||||
now=stats.now)
|
||||
generation_throughput = self._get_throughput(
|
||||
@@ -213,7 +216,8 @@ class StatLogger:
|
||||
# Log to stdout.
|
||||
logger.info(
|
||||
f"Avg prompt throughput: {prompt_throughput:.1f} tokens/s, "
|
||||
f"Avg generation throughput: {generation_throughput:.1f} tokens/s, "
|
||||
f"Avg generation throughput: "
|
||||
f"{generation_throughput:.1f} tokens/s, "
|
||||
f"Running: {stats.num_running} reqs, "
|
||||
f"Swapped: {stats.num_swapped} reqs, "
|
||||
f"Pending: {stats.num_waiting} reqs, "
|
||||
|
||||
Reference in New Issue
Block a user