[Bugfix] Add error handling for FINISHED_ERROR in OpenAIServing (#37148)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
@@ -29,11 +29,13 @@ from vllm.entrypoints.chat_utils import load_chat_template
|
||||
from vllm.entrypoints.launcher import serve_http
|
||||
from vllm.entrypoints.logger import RequestLogger
|
||||
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
|
||||
from vllm.entrypoints.openai.engine.protocol import GenerationError
|
||||
from vllm.entrypoints.openai.models.protocol import BaseModelPath
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
|
||||
from vllm.entrypoints.openai.server_utils import (
|
||||
engine_error_handler,
|
||||
exception_handler,
|
||||
generation_error_handler,
|
||||
get_uvicorn_log_config,
|
||||
http_exception_handler,
|
||||
lifespan,
|
||||
@@ -263,6 +265,7 @@ def build_app(
|
||||
app.exception_handler(RequestValidationError)(validation_exception_handler)
|
||||
app.exception_handler(EngineGenerateError)(engine_error_handler)
|
||||
app.exception_handler(EngineDeadError)(engine_error_handler)
|
||||
app.exception_handler(GenerationError)(generation_error_handler)
|
||||
app.exception_handler(Exception)(exception_handler)
|
||||
|
||||
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
|
||||
|
||||
@@ -21,7 +21,11 @@ from starlette.types import ASGIApp, Message, Receive, Scope, Send
|
||||
from vllm import envs
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.launcher import terminate_if_errored
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorInfo, ErrorResponse
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorInfo,
|
||||
ErrorResponse,
|
||||
GenerationError,
|
||||
)
|
||||
from vllm.entrypoints.utils import create_error_response, sanitize_message
|
||||
from vllm.exceptions import VLLMValidationError
|
||||
from vllm.logger import init_logger
|
||||
@@ -354,6 +358,17 @@ async def engine_error_handler(
|
||||
return JSONResponse(err.model_dump(), status_code=err.error.code)
|
||||
|
||||
|
||||
async def generation_error_handler(req: Request, exc: GenerationError):
|
||||
"""Handle GenerationError without logging stack traces.
|
||||
|
||||
GenerationError is a known, expected error (e.g. KV cache load failure)
|
||||
that should be returned to the client as a 500 response without polluting
|
||||
server logs with stack traces.
|
||||
"""
|
||||
err = create_error_response(exc)
|
||||
return JSONResponse(err.model_dump(), status_code=err.error.code)
|
||||
|
||||
|
||||
async def exception_handler(req: Request, exc: Exception):
|
||||
if req.app.state.args.log_error_stack:
|
||||
logger.exception(
|
||||
|
||||
Reference in New Issue
Block a user