[Frontend] Update OpenAI error response to upstream format (#22099)
Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>
This commit is contained in:
@@ -62,7 +62,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
DetokenizeRequest,
|
||||
DetokenizeResponse,
|
||||
EmbeddingRequest,
|
||||
EmbeddingResponse, ErrorResponse,
|
||||
EmbeddingResponse, ErrorInfo,
|
||||
ErrorResponse,
|
||||
LoadLoRAAdapterRequest,
|
||||
PoolingRequest, PoolingResponse,
|
||||
RerankRequest, RerankResponse,
|
||||
@@ -506,7 +507,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, TokenizeResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -540,7 +541,7 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, DetokenizeResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -556,7 +557,7 @@ def maybe_register_tokenizer_info_endpoint(args):
|
||||
"""Get comprehensive tokenizer information."""
|
||||
result = await tokenization(raw_request).get_tokenizer_info()
|
||||
return JSONResponse(content=result.model_dump(),
|
||||
status_code=result.code if isinstance(
|
||||
status_code=result.error.code if isinstance(
|
||||
result, ErrorResponse) else 200)
|
||||
|
||||
|
||||
@@ -603,7 +604,7 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, ResponsesResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
return StreamingResponse(content=generator, media_type="text/event-stream")
|
||||
@@ -620,7 +621,7 @@ async def retrieve_responses(response_id: str, raw_request: Request):
|
||||
|
||||
if isinstance(response, ErrorResponse):
|
||||
return JSONResponse(content=response.model_dump(),
|
||||
status_code=response.code)
|
||||
status_code=response.error.code)
|
||||
return JSONResponse(content=response.model_dump())
|
||||
|
||||
|
||||
@@ -635,7 +636,7 @@ async def cancel_responses(response_id: str, raw_request: Request):
|
||||
|
||||
if isinstance(response, ErrorResponse):
|
||||
return JSONResponse(content=response.model_dump(),
|
||||
status_code=response.code)
|
||||
status_code=response.error.code)
|
||||
return JSONResponse(content=response.model_dump())
|
||||
|
||||
|
||||
@@ -670,7 +671,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
|
||||
elif isinstance(generator, ChatCompletionResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
@@ -715,7 +716,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, CompletionResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -744,7 +745,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, EmbeddingResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -772,7 +773,7 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
|
||||
generator = await handler.create_pooling(request, raw_request)
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, PoolingResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -792,7 +793,7 @@ async def create_classify(request: ClassificationRequest,
|
||||
generator = await handler.create_classify(request, raw_request)
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
|
||||
elif isinstance(generator, ClassificationResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
@@ -821,7 +822,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
|
||||
generator = await handler.create_score(request, raw_request)
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, ScoreResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -881,7 +882,7 @@ async def create_transcriptions(raw_request: Request,
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
|
||||
elif isinstance(generator, TranscriptionResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
@@ -922,7 +923,7 @@ async def create_translations(request: Annotated[TranslationRequest,
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
|
||||
elif isinstance(generator, TranslationResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
@@ -950,7 +951,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
|
||||
generator = await handler.do_rerank(request, raw_request)
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.code)
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, RerankResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
@@ -1175,7 +1176,7 @@ async def invocations(raw_request: Request):
|
||||
msg = ("Cannot find suitable handler for request. "
|
||||
f"Expected one of: {type_names}")
|
||||
res = base(raw_request).create_error_response(message=msg)
|
||||
return JSONResponse(content=res.model_dump(), status_code=res.code)
|
||||
return JSONResponse(content=res.model_dump(), status_code=res.error.code)
|
||||
|
||||
|
||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||
@@ -1211,7 +1212,7 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
|
||||
response = await handler.load_lora_adapter(request)
|
||||
if isinstance(response, ErrorResponse):
|
||||
return JSONResponse(content=response.model_dump(),
|
||||
status_code=response.code)
|
||||
status_code=response.error.code)
|
||||
|
||||
return Response(status_code=200, content=response)
|
||||
|
||||
@@ -1223,7 +1224,7 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
|
||||
response = await handler.unload_lora_adapter(request)
|
||||
if isinstance(response, ErrorResponse):
|
||||
return JSONResponse(content=response.model_dump(),
|
||||
status_code=response.code)
|
||||
status_code=response.error.code)
|
||||
|
||||
return Response(status_code=200, content=response)
|
||||
|
||||
@@ -1502,9 +1503,10 @@ def build_app(args: Namespace) -> FastAPI:
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def http_exception_handler(_: Request, exc: HTTPException):
|
||||
err = ErrorResponse(message=exc.detail,
|
||||
err = ErrorResponse(
|
||||
error=ErrorInfo(message=exc.detail,
|
||||
type=HTTPStatus(exc.status_code).phrase,
|
||||
code=exc.status_code)
|
||||
code=exc.status_code))
|
||||
return JSONResponse(err.model_dump(), status_code=exc.status_code)
|
||||
|
||||
@app.exception_handler(RequestValidationError)
|
||||
@@ -1518,9 +1520,9 @@ def build_app(args: Namespace) -> FastAPI:
|
||||
else:
|
||||
message = exc_str
|
||||
|
||||
err = ErrorResponse(message=message,
|
||||
type=HTTPStatus.BAD_REQUEST.phrase,
|
||||
code=HTTPStatus.BAD_REQUEST)
|
||||
err = ErrorResponse(error=ErrorInfo(message=message,
|
||||
type=HTTPStatus.BAD_REQUEST.phrase,
|
||||
code=HTTPStatus.BAD_REQUEST))
|
||||
return JSONResponse(err.model_dump(),
|
||||
status_code=HTTPStatus.BAD_REQUEST)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user