Add Production Metrics in Prometheus format (#1890)

This commit is contained in:
Simon Mo
2023-12-02 16:37:44 -08:00
committed by GitHub
parent 5f09cbdb63
commit 5313c2cb8b
6 changed files with 89 additions and 2 deletions

View File

@@ -9,6 +9,8 @@ import time
from http import HTTPStatus
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
from aioprometheus import MetricsMiddleware
from aioprometheus.asgi.starlette import metrics
import fastapi
import uvicorn
from fastapi import Request
@@ -18,6 +20,7 @@ from fastapi.responses import JSONResponse, StreamingResponse, Response
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.metrics import add_global_metrics_labels
from vllm.entrypoints.openai.protocol import (
CompletionRequest, CompletionResponse, CompletionResponseChoice,
CompletionResponseStreamChoice, CompletionStreamResponse,
@@ -82,6 +85,10 @@ def parse_args():
return parser.parse_args()
app.add_middleware(MetricsMiddleware) # Trace HTTP server metrics
app.add_route("/metrics", metrics) # Exposes HTTP metrics
def create_error_response(status_code: HTTPStatus,
message: str) -> JSONResponse:
return JSONResponse(ErrorResponse(message=message,
@@ -722,6 +729,9 @@ if __name__ == "__main__":
trust_remote_code=engine_model_config.trust_remote_code)
load_chat_template(args, tokenizer)
# Register labels for metrics
add_global_metrics_labels(model_name=engine_args.model)
uvicorn.run(app,
host=args.host,
port=args.port,