diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py index b6842f3db..641171e3c 100644 --- a/tests/entrypoints/openai/responses/test_harmony.py +++ b/tests/entrypoints/openai/responses/test_harmony.py @@ -1302,16 +1302,17 @@ async def test_system_prompt_override(client: OpenAI, model_name: str): # Message structure may vary, skip this specific check pass + custom_system_prompt_2 = ( + "You are a helpful assistant that always responds in exactly 5 words." + ) + # Test 3: Test with different custom system prompt response_2 = await client.responses.create( model=model_name, input=[ { "role": "system", - "content": ( - "You are a helpful assistant that always " - "responds in exactly 5 words." - ), + "content": custom_system_prompt_2, }, {"role": "user", "content": "What is the weather like?"}, ], @@ -1328,3 +1329,27 @@ async def test_system_prompt_override(client: OpenAI, model_name: str): assert 3 <= word_count <= 8, ( f"Expected around 5 words, got {word_count} words: {response_2.output_text}" ) + + # Test 4: Test with structured content + response_3 = await client.responses.create( + model=model_name, + input=[ + { + "role": "system", + "content": [{"type": "input_text", "text": custom_system_prompt_2}], + }, + {"role": "user", "content": "What is the weather like?"}, + ], + temperature=0.0, + ) + + assert response_3 is not None + assert response_3.status == "completed" + assert response_3.output_text is not None + + # Count words in response (approximately, allowing for punctuation) + word_count = len(response_3.output_text.split()) + # Allow some flexibility (4-7 words) since the model might not be perfectly precise + assert 3 <= word_count <= 8, ( + f"Expected around 5 words, got {word_count} words: {response_3.output_text}" + ) diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 9f54a8081..2af7f578e 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -980,7 +980,9 @@ class OpenAIServingResponses(OpenAIServing): output_items.extend(last_items) return output_items - def _extract_system_message_from_request(self, request) -> str | None: + def _extract_system_message_from_request( + self, request: ResponsesRequest + ) -> str | None: system_msg = None if not isinstance(request.input, str): for response_msg in request.input: @@ -988,7 +990,17 @@ class OpenAIServingResponses(OpenAIServing): isinstance(response_msg, dict) and response_msg.get("role") == "system" ): - system_msg = response_msg.get("content") + content = response_msg.get("content") + if isinstance(content, str): + system_msg = content + elif isinstance(content, list): + for param in content: + if ( + isinstance(param, dict) + and param.get("type") == "input_text" + ): + system_msg = param.get("text") + break break return system_msg