[openai api] log exception in exception handler (1/N) (#31164)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@@ -6,7 +6,6 @@ from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
|
||||
from vllm.entrypoints.openai.engine.serving import GenerationError, OpenAIServing
|
||||
|
||||
|
||||
@@ -38,32 +37,6 @@ async def test_raise_if_error_raises_generation_error():
|
||||
serving._raise_if_error(None, "test-request-id") # should not raise
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_generation_error_to_response():
|
||||
"""test _convert_generation_error_to_response creates proper ErrorResponse"""
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.model_config = MagicMock()
|
||||
mock_engine.model_config.max_model_len = 100
|
||||
mock_models = MagicMock()
|
||||
|
||||
serving = OpenAIServing(
|
||||
engine_client=mock_engine,
|
||||
models=mock_models,
|
||||
request_logger=None,
|
||||
)
|
||||
|
||||
# create a GenerationError
|
||||
gen_error = GenerationError("Internal server error")
|
||||
|
||||
# convert to ErrorResponse
|
||||
error_response = serving._convert_generation_error_to_response(gen_error)
|
||||
|
||||
assert isinstance(error_response, ErrorResponse)
|
||||
assert error_response.error.type == "InternalServerError"
|
||||
assert error_response.error.message == "Internal server error"
|
||||
assert error_response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_generation_error_to_streaming_response():
|
||||
"""test _convert_generation_error_to_streaming_response output"""
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import Any
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import requests
|
||||
from openai import BadRequestError, NotFoundError, OpenAI
|
||||
from openai import InternalServerError, NotFoundError, OpenAI
|
||||
from openai_harmony import Message
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
@@ -698,7 +698,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
|
||||
async def test_function_calling_required(client: OpenAI, model_name: str):
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
|
||||
with pytest.raises(BadRequestError):
|
||||
with pytest.raises(InternalServerError):
|
||||
await client.responses.create(
|
||||
model=model_name,
|
||||
input="What's the weather like in Paris today?",
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from http import HTTPStatus
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
@@ -11,7 +10,7 @@ import pytest
|
||||
from vllm.config.multimodal import MultiModalConfig
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
|
||||
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
|
||||
from vllm.entrypoints.openai.engine.protocol import GenerationError
|
||||
from vllm.entrypoints.openai.models.protocol import BaseModelPath
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
@@ -145,12 +144,8 @@ async def test_chat_error_non_stream():
|
||||
stream=False,
|
||||
)
|
||||
|
||||
response = await serving_chat.create_chat_completion(request)
|
||||
|
||||
assert isinstance(response, ErrorResponse)
|
||||
assert response.error.type == "InternalServerError"
|
||||
assert response.error.message == "Internal server error"
|
||||
assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
|
||||
with pytest.raises(GenerationError):
|
||||
await serving_chat.create_chat_completion(request)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from http import HTTPStatus
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
@@ -11,7 +10,7 @@ import pytest
|
||||
from vllm.config.multimodal import MultiModalConfig
|
||||
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
|
||||
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
|
||||
from vllm.entrypoints.openai.engine.protocol import GenerationError
|
||||
from vllm.entrypoints.openai.models.protocol import BaseModelPath
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
@@ -131,12 +130,8 @@ async def test_completion_error_non_stream():
|
||||
stream=False,
|
||||
)
|
||||
|
||||
response = await serving_completion.create_completion(request)
|
||||
|
||||
assert isinstance(response, ErrorResponse)
|
||||
assert response.error.type == "InternalServerError"
|
||||
assert response.error.message == "Internal server error"
|
||||
assert response.error.code == HTTPStatus.INTERNAL_SERVER_ERROR
|
||||
with pytest.raises(GenerationError):
|
||||
await serving_completion.create_completion(request)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import json
|
||||
from http import HTTPStatus
|
||||
from typing import Final
|
||||
|
||||
import pytest
|
||||
import schemathesis
|
||||
from httpx import URL
|
||||
from hypothesis import settings
|
||||
from schemathesis import GenerationConfig
|
||||
from schemathesis.checks import not_a_server_error
|
||||
from schemathesis.internal.checks import CheckContext
|
||||
from schemathesis.models import Case
|
||||
from schemathesis.transports.responses import GenericResponse
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
@@ -127,10 +133,25 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
|
||||
return strategy.filter(no_invalid_types)
|
||||
|
||||
|
||||
def customized_not_a_server_error(
|
||||
ctx: CheckContext, response: GenericResponse, case: Case
|
||||
) -> bool | None:
|
||||
try:
|
||||
return not_a_server_error(ctx, response, case)
|
||||
except Exception:
|
||||
if (
|
||||
URL(response.request.url).path
|
||||
in ["/v1/chat/completions/render", "/v1/chat/completions"]
|
||||
and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value
|
||||
):
|
||||
return True
|
||||
raise
|
||||
|
||||
|
||||
@schema.parametrize()
|
||||
@schema.override(headers={"Content-Type": "application/json"})
|
||||
@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50)
|
||||
def test_openapi_stateless(case: schemathesis.Case):
|
||||
def test_openapi_stateless(case: Case):
|
||||
key = (
|
||||
case.operation.method.upper(),
|
||||
case.operation.path,
|
||||
@@ -155,4 +176,9 @@ def test_openapi_stateless(case: schemathesis.Case):
|
||||
}.get(key, DEFAULT_TIMEOUT_SECONDS)
|
||||
|
||||
# No need to verify SSL certificate for localhost
|
||||
case.call_and_validate(verify=False, timeout=timeout)
|
||||
case.call_and_validate(
|
||||
verify=False,
|
||||
timeout=timeout,
|
||||
additional_checks=(customized_not_a_server_error,),
|
||||
excluded_checks=(not_a_server_error,),
|
||||
)
|
||||
|
||||
@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
)
|
||||
from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
|
||||
from vllm.exceptions import VLLMValidationError
|
||||
from vllm.inputs import TokensPrompt
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
from vllm.renderers.hf import HfRenderer
|
||||
@@ -818,9 +819,8 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
resp = await serving_chat.create_chat_completion(req)
|
||||
assert isinstance(resp, ErrorResponse)
|
||||
assert "context length is only" in resp.error.message
|
||||
with pytest.raises(VLLMValidationError):
|
||||
await serving_chat.create_chat_completion(req)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -860,9 +860,8 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
|
||||
max_tokens=1,
|
||||
)
|
||||
|
||||
resp = await serving_chat.create_chat_completion(req)
|
||||
assert isinstance(resp, ErrorResponse)
|
||||
assert "context length is only" in resp.error.message
|
||||
with pytest.raises(VLLMValidationError):
|
||||
await serving_chat.create_chat_completion(req)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user