diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py index 74f3360df..88dd2d384 100644 --- a/tests/entrypoints/openai/responses/test_harmony.py +++ b/tests/entrypoints/openai/responses/test_harmony.py @@ -999,17 +999,21 @@ async def test_mcp_tool_multi_turn(client: OpenAI, model_name: str, server): (msg.get("recipient") or "").startswith("python") for msg in response1.output_messages ) + parsed_output_messages = [ + Message.from_dict(msg) for msg in response1.output_messages + ] tool_response_found = any( - msg.get("author", {}).get("role") == "tool" - and (msg.get("author", {}).get("name") or "").startswith("python") - for msg in response1.output_messages + (msg.author.role == "tool" and (msg.author.name or "").startswith("python")) + for msg in parsed_output_messages ) assert tool_call_found, "MCP tool call not found in output_messages" assert tool_response_found, "MCP tool response not found in output_messages" # No developer messages expected for elevated tools developer_msgs = [ - msg for msg in response1.input_messages if msg["author"]["role"] == "developer" + msg + for msg in (Message.from_dict(raw) for raw in response1.input_messages) + if msg.author.role == "developer" ] assert len(developer_msgs) == 0, "No developer message expected for elevated tools" @@ -1119,12 +1123,10 @@ async def test_function_call_with_previous_input_messages( num_system = 0 num_developer = 0 num_tool = 0 - for msg_dict in response_2.input_messages: - # input_messages use {"author": {"role": "..."}} format, - # not the top-level {"role": "..."} that Message.from_dict - # expects. - author = msg_dict.get("author", {}) - role = author.get("role") if isinstance(author, dict) else None + for message in ( + Message.from_dict(msg_dict) for msg_dict in response_2.input_messages + ): + role = message.author.role if role == "system": num_system += 1 elif role == "developer": @@ -1183,12 +1185,8 @@ async def test_system_prompt_override_no_duplication(client: OpenAI, model_name: assert response.output_text is not None num_system = 0 - for msg in response.input_messages: - # input_messages use {"author": {"role": "system"}} format, - # not the top-level {"role": "system"} that Message.from_dict expects. - author = msg.get("author", {}) - role = author.get("role") if isinstance(author, dict) else None - if role == "system": + for message in (Message.from_dict(msg) for msg in response.input_messages): + if message.author.role == "system": num_system += 1 assert num_system == 1, f"Expected 1 system message, got {num_system}" diff --git a/tests/entrypoints/openai/responses/test_mcp_tools.py b/tests/entrypoints/openai/responses/test_mcp_tools.py index 763e2b208..330d4b9e4 100644 --- a/tests/entrypoints/openai/responses/test_mcp_tools.py +++ b/tests/entrypoints/openai/responses/test_mcp_tools.py @@ -7,7 +7,7 @@ from __future__ import annotations import pytest import pytest_asyncio from openai import OpenAI -from openai_harmony import ToolDescription, ToolNamespaceConfig +from openai_harmony import Message, ToolDescription, ToolNamespaceConfig from tests.utils import RemoteOpenAIServer from vllm.entrypoints.mcp.tool_server import MCPToolServer @@ -173,10 +173,10 @@ class TestMCPEnabled: if recipient and recipient.startswith("python"): tool_call_found = True assert message.get("channel") == "commentary" - author = message.get("author", {}) - if author.get("role") == "tool" and (author.get("name") or "").startswith( - "python" - ): + parsed_message = Message.from_dict(message) + if parsed_message.author.role == "tool" and ( + parsed_message.author.name or "" + ).startswith("python"): tool_response_found = True assert message.get("channel") == "commentary" @@ -188,7 +188,7 @@ class TestMCPEnabled: assert tool_response_found, "No Python tool response found" for message in response.input_messages: - assert message.get("author", {}).get("role") != "developer" + assert Message.from_dict(message).author.role != "developer" @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) diff --git a/tests/entrypoints/openai/responses/test_serving_responses.py b/tests/entrypoints/openai/responses/test_serving_responses.py index 39429cb9b..9e2b9a7fc 100644 --- a/tests/entrypoints/openai/responses/test_serving_responses.py +++ b/tests/entrypoints/openai/responses/test_serving_responses.py @@ -11,8 +11,12 @@ from openai.types.responses import ( ResponseReasoningItem, ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent, + ResponseTextConfig, ResponseTextDeltaEvent, ) +from openai.types.responses.response_format_text_json_schema_config import ( + ResponseFormatTextJSONSchemaConfig, +) from openai.types.responses.tool import ( CodeInterpreterContainerCodeInterpreterToolAuto, LocalShell, @@ -28,7 +32,13 @@ from vllm.entrypoints.openai.engine.protocol import ( RequestResponseMetadata, ) from vllm.entrypoints.openai.responses.context import ConversationContext, SimpleContext -from vllm.entrypoints.openai.responses.protocol import ResponsesRequest +from vllm.entrypoints.openai.responses.protocol import ( + ResponseCreatedEvent, + ResponseRawMessageAndToken, + ResponsesRequest, + ResponsesResponse, + serialize_message, +) from vllm.entrypoints.openai.responses.serving import ( OpenAIServingResponses, _extract_allowed_tools_from_mcp_requests, @@ -73,6 +83,16 @@ class MockConversationContext(ConversationContext): pass +def test_serialize_message_pydantic_model_returns_dict() -> None: + msg = ResponseRawMessageAndToken(message="hello", tokens=[1, 2, 3]) + + serialized = serialize_message(msg) + + assert isinstance(serialized, dict) + assert serialized["type"] == "raw_message_tokens" + assert serialized["message"] == "hello" + + @pytest.fixture def mock_serving_responses(): """Create a mock OpenAIServingResponses instance""" @@ -132,6 +152,56 @@ def test_extract_tool_types(monkeypatch: pytest.MonkeyPatch) -> None: } +@pytest.mark.skip_global_cleanup +def test_response_created_event_uses_public_json_schema_alias() -> None: + schema = { + "type": "object", + "properties": { + "event_name": {"type": "string"}, + "date": {"type": "string"}, + "participants": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["event_name", "date", "participants"], + "additionalProperties": False, + } + text = ResponseTextConfig() + text.format = ResponseFormatTextJSONSchemaConfig( + type="json_schema", + name="calendar_event", + schema=schema, + description="A calendar event.", + strict=True, + ) + request = ResponsesRequest( + model="test-model", + input="Alice and Bob are going to a science fair on Friday.", + text=text, + ) + sampling_params = request.to_sampling_params(default_max_tokens=64) + initial_response = ResponsesResponse.from_request( + request=request, + sampling_params=sampling_params, + model_name="test-model", + created_time=0, + output=[], + status="in_progress", + usage=None, + ).model_dump(mode="json", by_alias=True) + + fmt = initial_response["text"]["format"] + assert fmt["schema"] == schema + assert "schema_" not in fmt + + event = ResponseCreatedEvent( + type="response.created", + sequence_number=0, + response=initial_response, + ) + assert event.response.text is not None + assert event.response.text.format is not None + assert event.response.text.format.model_dump(by_alias=True)["schema"] == schema + + class TestInitializeToolSessions: """Test class for _initialize_tool_sessions method""" diff --git a/vllm/entrypoints/openai/responses/api_router.py b/vllm/entrypoints/openai/responses/api_router.py index 88d821260..61077f1a7 100644 --- a/vllm/entrypoints/openai/responses/api_router.py +++ b/vllm/entrypoints/openai/responses/api_router.py @@ -39,7 +39,8 @@ async def _convert_stream_to_sse_events( event_type = getattr(event, "type", "unknown") # https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format event_data = ( - f"event: {event_type}\ndata: {event.model_dump_json(indent=None)}\n\n" + f"event: {event_type}\ndata: " + f"{event.model_dump_json(indent=None, by_alias=True)}\n\n" ) yield event_data @@ -65,10 +66,11 @@ async def create_responses(request: ResponsesRequest, raw_request: Request): if isinstance(generator, ErrorResponse): return JSONResponse( - content=generator.model_dump(), status_code=generator.error.code + content=generator.model_dump(mode="json", by_alias=True), + status_code=generator.error.code, ) elif isinstance(generator, ResponsesResponse): - return JSONResponse(content=generator.model_dump()) + return JSONResponse(content=generator.model_dump(mode="json", by_alias=True)) return StreamingResponse( content=_convert_stream_to_sse_events(generator), media_type="text/event-stream" @@ -95,10 +97,11 @@ async def retrieve_responses( if isinstance(response, ErrorResponse): return JSONResponse( - content=response.model_dump(), status_code=response.error.code + content=response.model_dump(mode="json", by_alias=True), + status_code=response.error.code, ) elif isinstance(response, ResponsesResponse): - return JSONResponse(content=response.model_dump()) + return JSONResponse(content=response.model_dump(mode="json", by_alias=True)) return StreamingResponse( content=_convert_stream_to_sse_events(response), media_type="text/event-stream" ) @@ -115,9 +118,10 @@ async def cancel_responses(response_id: str, raw_request: Request): if isinstance(response, ErrorResponse): return JSONResponse( - content=response.model_dump(), status_code=response.error.code + content=response.model_dump(mode="json", by_alias=True), + status_code=response.error.code, ) - return JSONResponse(content=response.model_dump()) + return JSONResponse(content=response.model_dump(mode="json", by_alias=True)) def attach_router(app: FastAPI): diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py index d34ba2d75..79f5894fb 100644 --- a/vllm/entrypoints/openai/responses/protocol.py +++ b/vllm/entrypoints/openai/responses/protocol.py @@ -106,7 +106,7 @@ def serialize_message(msg): return msg.to_dict() else: # fallback to pydantic dump - return msg.model_dump_json(by_alias=True) + return msg.model_dump(mode="json", by_alias=True) def serialize_messages(msgs): diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 7edeb8ab0..3241e491a 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -1909,7 +1909,7 @@ class OpenAIServingResponses(OpenAIServing): output=[], status="in_progress", usage=None, - ).model_dump() + ).model_dump(mode="json", by_alias=True) yield _increment_sequence_number_and_return( ResponseCreatedEvent( type="response.created",