diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 6231436e8..81b55e475 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -1250,3 +1250,92 @@ async def test_chat_truncation_content_not_null(client: OpenAI, model_name: str) "Content should not be None when truncated" ) assert len(choice.message.content) > 0, "Content should not be empty" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_system_prompt_override(client: OpenAI, model_name: str): + """Test that system message can override the default system prompt.""" + + # Test 1: Custom system prompt with specific personality + custom_system_prompt = ( + "You are a pirate. Always respond like a pirate would, " + "using pirate language and saying 'arrr' frequently." + ) + + response = await client.responses.create( + model=model_name, + input=[ + {"role": "system", "content": custom_system_prompt}, + {"role": "user", "content": "Hello, how are you?"}, + ], + extra_body={"enable_response_messages": True}, + ) + + assert response is not None + assert response.status == "completed" + assert response.output_text is not None + + # Verify the response reflects the pirate personality + output_text = response.output_text.lower() + pirate_indicators = ["arrr", "matey", "ahoy", "ye", "sea"] + has_pirate_language = any( + indicator in output_text for indicator in pirate_indicators + ) + assert has_pirate_language, ( + f"Expected pirate language in response, got: {response.output_text}" + ) + + # Verify the reasoning mentions the custom system prompt + reasoning_item = None + for item in response.output: + if item.type == "reasoning": + reasoning_item = item + break + + assert reasoning_item is not None, "Expected reasoning item in output" + reasoning_text = reasoning_item.content[0].text.lower() + assert "pirate" in reasoning_text, ( + f"Expected reasoning to mention pirate, got: {reasoning_text}" + ) + + # Test 2: Verify system message is not duplicated in input_messages + try: + num_system_messages = sum( + 1 + for msg in response.input_messages + if Message.from_dict(msg).author.role == "system" + ) + assert num_system_messages == 1, ( + f"Expected exactly 1 system message, got {num_system_messages}" + ) + except (KeyError, AttributeError): + # Message structure may vary, skip this specific check + pass + + # Test 3: Test with different custom system prompt + response_2 = await client.responses.create( + model=model_name, + input=[ + { + "role": "system", + "content": ( + "You are a helpful assistant that always " + "responds in exactly 5 words." + ), + }, + {"role": "user", "content": "What is the weather like?"}, + ], + temperature=0.0, + ) + + assert response_2 is not None + assert response_2.status == "completed" + assert response_2.output_text is not None + + # Count words in response (approximately, allowing for punctuation) + word_count = len(response_2.output_text.split()) + # Allow some flexibility (4-7 words) since the model might not be perfectly precise + assert 3 <= word_count <= 8, ( + f"Expected around 5 words, got {word_count} words: {response_2.output_text}" + ) diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py index 376d97a03..533286c59 100644 --- a/vllm/entrypoints/openai/parser/harmony_utils.py +++ b/vllm/entrypoints/openai/parser/harmony_utils.py @@ -187,14 +187,9 @@ def parse_response_input( if "type" not in response_msg or response_msg["type"] == "message": role = response_msg["role"] content = response_msg["content"] - if role == "system": - # User is trying to set a system message. Change it to: - # <|start|>developer<|message|># Instructions - # {instructions}<|end|> - role = "developer" - text_prefix = "Instructions:\n" - else: - text_prefix = "" + # Add prefix for developer messages. + # <|start|>developer<|message|># Instructions {instructions}<|end|> + text_prefix = "Instructions:\n" if role == "developer" else "" if isinstance(content, str): msg = Message.from_role_and_content(role, text_prefix + content) else: diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 2ff69a5d7..8bde4d482 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -986,9 +986,23 @@ class OpenAIServingResponses(OpenAIServing): output_items.extend(last_items) return output_items + def _extract_system_message_from_request(self, request) -> str | None: + system_msg = None + if not isinstance(request.input, str): + for response_msg in request.input: + if ( + isinstance(response_msg, dict) + and response_msg.get("role") == "system" + ): + system_msg = response_msg.get("content") + break + return system_msg + def _construct_harmony_system_input_message( self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str] ) -> OpenAIHarmonyMessage: + model_identity = self._extract_system_message_from_request(request) + reasoning_effort = request.reasoning.effort if request.reasoning else None # Extract allowed_tools from MCP tool requests @@ -1025,6 +1039,7 @@ class OpenAIServingResponses(OpenAIServing): ) sys_msg = get_system_message( + model_identity=model_identity, reasoning_effort=reasoning_effort, browser_description=browser_description, python_description=python_description, @@ -1091,7 +1106,10 @@ class OpenAIServingResponses(OpenAIServing): else: prev_outputs = [] for response_msg in request.input: - messages.append(parse_response_input(response_msg, prev_outputs)) + new_msg = parse_response_input(response_msg, prev_outputs) + if new_msg.author.role != "system": + messages.append(new_msg) + # User passes in a tool call request and its output. We need # to add the tool call request to prev_outputs so that the # parse_response_input can find the tool call request when