[V1][Metrics] Add per-request prompt/generation_tokens histograms (#12516)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-01-28 22:07:22 +00:00
committed by GitHub
parent f26d790718
commit c386c43ca3
5 changed files with 103 additions and 15 deletions

View File

@@ -53,8 +53,7 @@ class AsyncLLM(EngineClient):
self.log_stats = log_stats
self.stat_loggers: List[StatLoggerBase] = [
LoggingStatLogger(),
PrometheusStatLogger(labels=dict(
model_name=self.model_config.served_model_name)),
PrometheusStatLogger(vllm_config.model_config),
]
# Tokenizer (+ ensure liveness if running in another process).