[openai api] log exception in exception handler (1/N) (#31164)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2026-03-06 00:00:12 +08:00
committed by GitHub
parent 612e7729c2
commit 176c799f4c
37 changed files with 908 additions and 1187 deletions

View File

@@ -6,7 +6,6 @@ from unittest.mock import MagicMock
import pytest
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.serving import GenerationError, OpenAIServing
@@ -38,32 +37,6 @@ async def test_raise_if_error_raises_generation_error():
serving._raise_if_error(None, "test-request-id") # should not raise
@pytest.mark.asyncio
async def test_convert_generation_error_to_response():
"""test _convert_generation_error_to_response creates proper ErrorResponse"""
mock_engine = MagicMock()
mock_engine.model_config = MagicMock()
mock_engine.model_config.max_model_len = 100
mock_models = MagicMock()
serving = OpenAIServing(
engine_client=mock_engine,
models=mock_models,
request_logger=None,
)
# create a GenerationError
gen_error = GenerationError("Internal server error")
# convert to ErrorResponse
error_response = serving._convert_generation_error_to_response(gen_error)
assert isinstance(error_response, ErrorResponse)
assert error_response.error.type == "InternalServerError"
assert error_response.error.message == "Internal server error"
assert error_response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
@pytest.mark.asyncio
async def test_convert_generation_error_to_streaming_response():
"""test _convert_generation_error_to_streaming_response output"""

View File

@@ -13,7 +13,7 @@ from typing import Any
import pytest
import pytest_asyncio
import requests
from openai import BadRequestError, NotFoundError, OpenAI
from openai import InternalServerError, NotFoundError, OpenAI
from openai_harmony import Message
from ....utils import RemoteOpenAIServer
@@ -698,7 +698,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
async def test_function_calling_required(client: OpenAI, model_name: str):
tools = [GET_WEATHER_SCHEMA]
with pytest.raises(BadRequestError):
with pytest.raises(InternalServerError):
await client.responses.create(
model=model_name,
input="What's the weather like in Paris today?",

View File

@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass, field
from http import HTTPStatus
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
@@ -11,7 +10,7 @@ import pytest
from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.protocol import GenerationError
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.outputs import CompletionOutput, RequestOutput
@@ -145,12 +144,8 @@ async def test_chat_error_non_stream():
stream=False,
)
response = await serving_chat.create_chat_completion(request)
assert isinstance(response, ErrorResponse)
assert response.error.type == "InternalServerError"
assert response.error.message == "Internal server error"
assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
with pytest.raises(GenerationError):
await serving_chat.create_chat_completion(request)
@pytest.mark.asyncio

View File

@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass, field
from http import HTTPStatus
from typing import Any
from unittest.mock import MagicMock
@@ -11,7 +10,7 @@ import pytest
from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.protocol import GenerationError
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.outputs import CompletionOutput, RequestOutput
@@ -131,12 +130,8 @@ async def test_completion_error_non_stream():
stream=False,
)
response = await serving_completion.create_completion(request)
assert isinstance(response, ErrorResponse)
assert response.error.type == "InternalServerError"
assert response.error.message == "Internal server error"
assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
with pytest.raises(GenerationError):
await serving_completion.create_completion(request)
@pytest.mark.asyncio

View File

@@ -1,12 +1,18 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from http import HTTPStatus
from typing import Final
import pytest
import schemathesis
from httpx import URL
from hypothesis import settings
from schemathesis import GenerationConfig
from schemathesis.checks import not_a_server_error
from schemathesis.internal.checks import CheckContext
from schemathesis.models import Case
from schemathesis.transports.responses import GenericResponse
from ...utils import RemoteOpenAIServer
@@ -127,10 +133,25 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
return strategy.filter(no_invalid_types)
def customized_not_a_server_error(
ctx: CheckContext, response: GenericResponse, case: Case
) -> bool | None:
try:
return not_a_server_error(ctx, response, case)
except Exception:
if (
URL(response.request.url).path
in ["/v1/chat/completions/render", "/v1/chat/completions"]
and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value
):
return True
raise
@schema.parametrize()
@schema.override(headers={"Content-Type": "application/json"})
@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50)
def test_openapi_stateless(case: schemathesis.Case):
def test_openapi_stateless(case: Case):
key = (
case.operation.method.upper(),
case.operation.path,
@@ -155,4 +176,9 @@ def test_openapi_stateless(case: schemathesis.Case):
}.get(key, DEFAULT_TIMEOUT_SECONDS)
# No need to verify SSL certificate for localhost
case.call_and_validate(verify=False, timeout=timeout)
case.call_and_validate(
verify=False,
timeout=timeout,
additional_checks=(customized_not_a_server_error,),
excluded_checks=(not_a_server_error,),
)

View File

@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
from vllm.exceptions import VLLMValidationError
from vllm.inputs import TokensPrompt
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.renderers.hf import HfRenderer
@@ -818,9 +819,8 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
max_tokens=10,
)
resp = await serving_chat.create_chat_completion(req)
assert isinstance(resp, ErrorResponse)
assert "context length is only" in resp.error.message
with pytest.raises(VLLMValidationError):
await serving_chat.create_chat_completion(req)
@pytest.mark.asyncio
@@ -860,9 +860,8 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
max_tokens=1,
)
resp = await serving_chat.create_chat_completion(req)
assert isinstance(resp, ErrorResponse)
assert "context length is only" in resp.error.message
with pytest.raises(VLLMValidationError):
await serving_chat.create_chat_completion(req)
@pytest.mark.asyncio