[openai api] log exception in exception handler (1/N) (#31164)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2026-03-06 00:00:12 +08:00
committed by GitHub
parent 612e7729c2
commit 176c799f4c
37 changed files with 908 additions and 1187 deletions

View File

@@ -11,7 +11,7 @@ from contextlib import asynccontextmanager
from http import HTTPStatus
import pydantic
from fastapi import FastAPI, HTTPException, Request
from fastapi import FastAPI, HTTPException, Request, Response
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from starlette.concurrency import iterate_in_threadpool
@@ -20,11 +20,13 @@ from starlette.types import ASGIApp, Message, Receive, Scope, Send
from vllm import envs
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.launcher import terminate_if_errored
from vllm.entrypoints.openai.engine.protocol import ErrorInfo, ErrorResponse
from vllm.entrypoints.utils import sanitize_message
from vllm.entrypoints.utils import create_error_response, sanitize_message
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
from vllm.utils.gc_utils import freeze_gc_heap
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
logger = init_logger("vllm.entrypoints.openai.server_utils")
@@ -309,7 +311,69 @@ async def log_response(request: Request, call_next):
return response
async def http_exception_handler(_: Request, exc: HTTPException):
async def engine_error_handler(
req: Request, exc: EngineDeadError | EngineGenerateError
):
"""
VLLM V1 AsyncLLM catches exceptions and returns
only two types: EngineGenerateError and EngineDeadError.
EngineGenerateError is raised by the per request generate()
method. This error could be request specific (and therefore
recoverable - e.g. if there is an error in input processing).
EngineDeadError is raised by the background output_handler
method. This error is global and therefore not recoverable.
We register these @app.exception_handlers to return nice
responses to the end user if they occur and shut down if needed.
See https://fastapi.tiangolo.com/tutorial/handling-errors/
for more details on how exception handlers work.
If an exception is encountered in a StreamingResponse
generator, the exception is not raised, since we already sent
a 200 status. Rather, we send an error message as the next chunk.
Since the exception is not raised, this means that the server
will not automatically shut down. Instead, we use the watchdog
background task for check for errored state.
"""
if req.app.state.args.log_error_stack:
logger.exception(
"Engine Exception caught. Request id: %s",
req.state.request_metadata.request_id
if hasattr(req.state, "request_metadata")
else None,
)
terminate_if_errored(
server=req.app.state.server,
engine=req.app.state.engine_client,
)
return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
async def exception_handler(req: Request, exc: Exception):
if req.app.state.args.log_error_stack:
logger.exception(
"Exception caught. Request id: %s",
req.state.request_metadata.request_id
if hasattr(req.state, "request_metadata")
else None,
)
err = create_error_response(exc)
return JSONResponse(err.model_dump(), status_code=err.error.code)
async def http_exception_handler(req: Request, exc: HTTPException):
if req.app.state.args.log_error_stack:
logger.exception(
"HTTPException caught. Request id: %s",
req.state.request_metadata.request_id
if hasattr(req.state, "request_metadata")
else None,
)
err = ErrorResponse(
error=ErrorInfo(
message=sanitize_message(exc.detail),
@@ -320,7 +384,15 @@ async def http_exception_handler(_: Request, exc: HTTPException):
return JSONResponse(err.model_dump(), status_code=exc.status_code)
async def validation_exception_handler(_: Request, exc: RequestValidationError):
async def validation_exception_handler(req: Request, exc: RequestValidationError):
if req.app.state.args.log_error_stack:
logger.exception(
"RequestValidationError caught. Request id: %s",
req.state.request_metadata.request_id
if hasattr(req.state, "request_metadata")
else None,
)
param = None
errors = exc.errors()
for error in errors: