[openai api] log exception in exception handler (1/N) (#31164)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2026-03-06 00:00:12 +08:00
committed by GitHub
parent 612e7729c2
commit 176c799f4c
37 changed files with 908 additions and 1187 deletions

View File

@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
)
from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
from vllm.exceptions import VLLMValidationError
from vllm.inputs import TokensPrompt
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.renderers.hf import HfRenderer
@@ -818,9 +819,8 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
max_tokens=10,
)
resp = await serving_chat.create_chat_completion(req)
assert isinstance(resp, ErrorResponse)
assert "context length is only" in resp.error.message
with pytest.raises(VLLMValidationError):
await serving_chat.create_chat_completion(req)
@pytest.mark.asyncio
@@ -860,9 +860,8 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
max_tokens=1,
)
resp = await serving_chat.create_chat_completion(req)
assert isinstance(resp, ErrorResponse)
assert "context length is only" in resp.error.message
with pytest.raises(VLLMValidationError):
await serving_chat.create_chat_completion(req)
@pytest.mark.asyncio