[openai api] log exception in exception handler (1/N) (#31164)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
2026-03-06 00:00:12 +08:00
parent 612e7729c2
commit 176c799f4c
37 changed files with 908 additions and 1187 deletions
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
 )
 from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
 from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
+from vllm.exceptions import VLLMValidationError
 from vllm.inputs import TokensPrompt
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.renderers.hf import HfRenderer
@@ -818,9 +819,8 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
        max_tokens=10,
    )

-    resp = await serving_chat.create_chat_completion(req)
-    assert isinstance(resp, ErrorResponse)
-    assert "context length is only" in resp.error.message
+    with pytest.raises(VLLMValidationError):
+        await serving_chat.create_chat_completion(req)


@pytest.mark.asyncio
@@ -860,9 +860,8 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
        max_tokens=1,
    )

-    resp = await serving_chat.create_chat_completion(req)
-    assert isinstance(resp, ErrorResponse)
-    assert "context length is only" in resp.error.message
+    with pytest.raises(VLLMValidationError):
+        await serving_chat.create_chat_completion(req)


@pytest.mark.asyncio