[openapi server] log exception in exception handler(2/N) (#36201)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@@ -50,10 +50,7 @@ async def create_chat_completion(request: ChatCompletionRequest, raw_request: Re
|
||||
)
|
||||
handler = chat(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Chat Completions API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Chat Completions API")
|
||||
|
||||
generator = await handler.create_chat_completion(request, raw_request)
|
||||
|
||||
|
||||
@@ -49,10 +49,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
||||
)
|
||||
handler = completion(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Completions API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Completions API")
|
||||
|
||||
generator = await handler.create_completion(request, raw_request)
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ from http import HTTPStatus
|
||||
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorInfo,
|
||||
ErrorResponse,
|
||||
ModelCard,
|
||||
ModelList,
|
||||
@@ -18,7 +17,8 @@ from vllm.entrypoints.serve.lora.protocol import (
|
||||
LoadLoRAAdapterRequest,
|
||||
UnloadLoRAAdapterRequest,
|
||||
)
|
||||
from vllm.entrypoints.utils import sanitize_message
|
||||
from vllm.entrypoints.utils import create_error_response
|
||||
from vllm.exceptions import LoRAAdapterNotFoundError
|
||||
from vllm.logger import init_logger
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||
@@ -152,15 +152,15 @@ class OpenAIServingModels:
|
||||
try:
|
||||
await self.engine_client.add_lora(lora_request)
|
||||
except Exception as e:
|
||||
error_type = "BadRequestError"
|
||||
status_code = HTTPStatus.BAD_REQUEST
|
||||
if "No adapter found" in str(e):
|
||||
error_type = "NotFoundError"
|
||||
status_code = HTTPStatus.NOT_FOUND
|
||||
|
||||
return create_error_response(
|
||||
message=str(e), err_type=error_type, status_code=status_code
|
||||
)
|
||||
if str(
|
||||
LoRAAdapterNotFoundError(
|
||||
lora_request.lora_name, lora_request.lora_path
|
||||
)
|
||||
) in str(e):
|
||||
raise LoRAAdapterNotFoundError(
|
||||
lora_request.lora_name, lora_request.lora_path
|
||||
) from e
|
||||
raise
|
||||
|
||||
self.lora_requests[lora_name] = lora_request
|
||||
logger.info(
|
||||
@@ -292,17 +292,3 @@ class OpenAIServingModels:
|
||||
err_type="NotFoundError",
|
||||
status_code=HTTPStatus.NOT_FOUND,
|
||||
)
|
||||
|
||||
|
||||
def create_error_response(
|
||||
message: str,
|
||||
err_type: str = "BadRequestError",
|
||||
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
|
||||
) -> ErrorResponse:
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(
|
||||
message=sanitize_message(message),
|
||||
type=err_type,
|
||||
code=status_code.value,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -59,10 +59,7 @@ async def _convert_stream_to_sse_events(
|
||||
async def create_responses(request: ResponsesRequest, raw_request: Request):
|
||||
handler = responses(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Responses API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Responses API")
|
||||
|
||||
generator = await handler.create_responses(request, raw_request)
|
||||
|
||||
@@ -88,10 +85,7 @@ async def retrieve_responses(
|
||||
):
|
||||
handler = responses(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Responses API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Responses API")
|
||||
|
||||
response = await handler.retrieve_responses(
|
||||
response_id,
|
||||
@@ -115,10 +109,7 @@ async def retrieve_responses(
|
||||
async def cancel_responses(response_id: str, raw_request: Request):
|
||||
handler = responses(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Responses API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Responses API")
|
||||
|
||||
response = await handler.cancel_responses(response_id)
|
||||
|
||||
|
||||
@@ -65,10 +65,7 @@ async def create_transcriptions(
|
||||
):
|
||||
handler = transcription(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Transcriptions API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Transcriptions API")
|
||||
|
||||
audio_data = await request.file.read()
|
||||
|
||||
@@ -101,10 +98,7 @@ async def create_translations(
|
||||
):
|
||||
handler = translation(raw_request)
|
||||
if handler is None:
|
||||
base_server = raw_request.app.state.openai_serving_tokenization
|
||||
return base_server.create_error_response(
|
||||
message="The model does not support Translations API"
|
||||
)
|
||||
raise NotImplementedError("The model does not support Translations API")
|
||||
|
||||
audio_data = await request.file.read()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user