[openai api] log exception in exception handler (1/N) (#31164)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
2026-03-06 00:00:12 +08:00
parent 612e7729c2
commit 176c799f4c
37 changed files with 908 additions and 1187 deletions
--- a/tests/entrypoints/openai/responses/test_errors.py
+++ b/tests/entrypoints/openai/responses/test_errors.py
@@ -6,7 +6,6 @@ from unittest.mock import MagicMock

 import pytest

-from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.engine.serving import GenerationError, OpenAIServing


@@ -38,32 +37,6 @@ async def test_raise_if_error_raises_generation_error():
    serving._raise_if_error(None, "test-request-id")  # should not raise


-@pytest.mark.asyncio
-async def test_convert_generation_error_to_response():
-    """test _convert_generation_error_to_response creates proper ErrorResponse"""
-    mock_engine = MagicMock()
-    mock_engine.model_config = MagicMock()
-    mock_engine.model_config.max_model_len = 100
-    mock_models = MagicMock()
-
-    serving = OpenAIServing(
-        engine_client=mock_engine,
-        models=mock_models,
-        request_logger=None,
-    )
-
-    # create a GenerationError
-    gen_error = GenerationError("Internal server error")
-
-    # convert to ErrorResponse
-    error_response = serving._convert_generation_error_to_response(gen_error)
-
-    assert isinstance(error_response, ErrorResponse)
-    assert error_response.error.type == "InternalServerError"
-    assert error_response.error.message == "Internal server error"
-    assert error_response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
-
-
@pytest.mark.asyncio
 async def test_convert_generation_error_to_streaming_response():
    """test _convert_generation_error_to_streaming_response output"""
--- a/tests/entrypoints/openai/responses/test_harmony.py
+++ b/tests/entrypoints/openai/responses/test_harmony.py
@@ -13,7 +13,7 @@ from typing import Any
 import pytest
 import pytest_asyncio
 import requests
-from openai import BadRequestError, NotFoundError, OpenAI
+from openai import InternalServerError, NotFoundError, OpenAI
 from openai_harmony import Message

 from ....utils import RemoteOpenAIServer
@@ -698,7 +698,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
 async def test_function_calling_required(client: OpenAI, model_name: str):
    tools = [GET_WEATHER_SCHEMA]

-    with pytest.raises(BadRequestError):
+    with pytest.raises(InternalServerError):
        await client.responses.create(
            model=model_name,
            input="What's the weather like in Paris today?",
--- a/tests/entrypoints/openai/test_chat_error.py
+++ b/tests/entrypoints/openai/test_chat_error.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 from dataclasses import dataclass, field
-from http import HTTPStatus
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

@@ -11,7 +10,7 @@ import pytest
 from vllm.config.multimodal import MultiModalConfig
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
 from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
-from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.openai.engine.protocol import GenerationError
 from vllm.entrypoints.openai.models.protocol import BaseModelPath
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
 from vllm.outputs import CompletionOutput, RequestOutput
@@ -145,12 +144,8 @@ async def test_chat_error_non_stream():
        stream=False,
    )

-    response = await serving_chat.create_chat_completion(request)
-
-    assert isinstance(response, ErrorResponse)
-    assert response.error.type == "InternalServerError"
-    assert response.error.message == "Internal server error"
-    assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
+    with pytest.raises(GenerationError):
+        await serving_chat.create_chat_completion(request)


@pytest.mark.asyncio
--- a/tests/entrypoints/openai/test_completion_error.py
+++ b/tests/entrypoints/openai/test_completion_error.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 from dataclasses import dataclass, field
-from http import HTTPStatus
 from typing import Any
 from unittest.mock import MagicMock

@@ -11,7 +10,7 @@ import pytest
 from vllm.config.multimodal import MultiModalConfig
 from vllm.entrypoints.openai.completion.protocol import CompletionRequest
 from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
-from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.openai.engine.protocol import GenerationError
 from vllm.entrypoints.openai.models.protocol import BaseModelPath
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
 from vllm.outputs import CompletionOutput, RequestOutput
@@ -131,12 +130,8 @@ async def test_completion_error_non_stream():
        stream=False,
    )

-    response = await serving_completion.create_completion(request)
-
-    assert isinstance(response, ErrorResponse)
-    assert response.error.type == "InternalServerError"
-    assert response.error.message == "Internal server error"
-    assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
+    with pytest.raises(GenerationError):
+        await serving_completion.create_completion(request)


@pytest.mark.asyncio
--- a/tests/entrypoints/openai/test_openai_schema.py
+++ b/tests/entrypoints/openai/test_openai_schema.py
@@ -1,12 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
+from http import HTTPStatus
 from typing import Final

 import pytest
 import schemathesis
+from httpx import URL
 from hypothesis import settings
 from schemathesis import GenerationConfig
+from schemathesis.checks import not_a_server_error
+from schemathesis.internal.checks import CheckContext
+from schemathesis.models import Case
+from schemathesis.transports.responses import GenericResponse

 from ...utils import RemoteOpenAIServer

@@ -127,10 +133,25 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
    return strategy.filter(no_invalid_types)


+def customized_not_a_server_error(
+    ctx: CheckContext, response: GenericResponse, case: Case
+) -> bool | None:
+    try:
+        return not_a_server_error(ctx, response, case)
+    except Exception:
+        if (
+            URL(response.request.url).path
+            in ["/v1/chat/completions/render", "/v1/chat/completions"]
+            and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value
+        ):
+            return True
+        raise
+
+
@schema.parametrize()
@schema.override(headers={"Content-Type": "application/json"})
@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50)
-def test_openapi_stateless(case: schemathesis.Case):
+def test_openapi_stateless(case: Case):
    key = (
        case.operation.method.upper(),
        case.operation.path,
@@ -155,4 +176,9 @@ def test_openapi_stateless(case: schemathesis.Case):
    }.get(key, DEFAULT_TIMEOUT_SECONDS)

    # No need to verify SSL certificate for localhost
-    case.call_and_validate(verify=False, timeout=timeout)
+    case.call_and_validate(
+        verify=False,
+        timeout=timeout,
+        additional_checks=(customized_not_a_server_error,),
+        excluded_checks=(not_a_server_error,),
+    )
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
 )
 from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
 from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
+from vllm.exceptions import VLLMValidationError
 from vllm.inputs import TokensPrompt
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.renderers.hf import HfRenderer
@@ -818,9 +819,8 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
        max_tokens=10,
    )

-    resp = await serving_chat.create_chat_completion(req)
-    assert isinstance(resp, ErrorResponse)
-    assert "context length is only" in resp.error.message
+    with pytest.raises(VLLMValidationError):
+        await serving_chat.create_chat_completion(req)


@pytest.mark.asyncio
@@ -860,9 +860,8 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
        max_tokens=1,
    )

-    resp = await serving_chat.create_chat_completion(req)
-    assert isinstance(resp, ErrorResponse)
-    assert "context length is only" in resp.error.message
+    with pytest.raises(VLLMValidationError):
+        await serving_chat.create_chat_completion(req)


@pytest.mark.asyncio