[responsesAPI] parse reasoning item input (#28248)
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
71
tests/entrypoints/openai/test_response_api_simple.py
Normal file
71
tests/entrypoints/openai/test_response_api_simple.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from openai import OpenAI
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "Qwen/Qwen3-8B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
args = ["--reasoning-parser", "qwen3", "--max_model_len", "5000"]
|
||||
env_dict = dict(
|
||||
VLLM_ENABLE_RESPONSES_API_STORE="1",
|
||||
# uncomment for tool calling
|
||||
# PYTHON_EXECUTION_BACKEND="dangerously_use_uv",
|
||||
)
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_basic(client: OpenAI, model_name: str):
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input="What is 13 * 24?",
|
||||
)
|
||||
assert response is not None
|
||||
print("response: ", response)
|
||||
assert response.status == "completed"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_reasoning_item(client: OpenAI, model_name: str):
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input=[
|
||||
{"type": "message", "content": "Hello.", "role": "user"},
|
||||
{
|
||||
"type": "reasoning",
|
||||
"id": "lol",
|
||||
"content": [
|
||||
{
|
||||
"type": "reasoning_text",
|
||||
"text": "We need to respond: greeting.",
|
||||
}
|
||||
],
|
||||
"summary": [],
|
||||
},
|
||||
],
|
||||
temperature=0.0,
|
||||
)
|
||||
assert response is not None
|
||||
assert response.status == "completed"
|
||||
# make sure we get a reasoning and text output
|
||||
assert response.output[0].type == "reasoning"
|
||||
assert response.output[1].type == "message"
|
||||
assert type(response.output[1].content[0].text) is str
|
||||
@@ -35,7 +35,7 @@ GET_WEATHER_SCHEMA = {
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
args = ["--enforce-eager", "--tool-server", "demo"]
|
||||
args = ["--enforce-eager", "--tool-server", "demo", "--max_model_len", "5000"]
|
||||
env_dict = dict(
|
||||
VLLM_ENABLE_RESPONSES_API_STORE="1",
|
||||
PYTHON_EXECUTION_BACKEND="dangerously_use_uv",
|
||||
@@ -550,6 +550,31 @@ def call_function(name, args):
|
||||
raise ValueError(f"Unknown function: {name}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_reasoning_item(client: OpenAI, model_name: str):
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input=[
|
||||
{"type": "message", "content": "Hello.", "role": "user"},
|
||||
{
|
||||
"type": "reasoning",
|
||||
"id": "lol",
|
||||
"content": [
|
||||
{
|
||||
"type": "reasoning_text",
|
||||
"text": "We need to respond: greeting.",
|
||||
}
|
||||
],
|
||||
"summary": [],
|
||||
},
|
||||
],
|
||||
temperature=0.0,
|
||||
)
|
||||
assert response is not None
|
||||
assert response.status == "completed"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling(client: OpenAI, model_name: str):
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content,
|
||||
ResponseReasoningItem,
|
||||
Summary,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.responses_utils import (
|
||||
construct_chat_message_with_tool_call,
|
||||
convert_tool_responses_to_completions_format,
|
||||
)
|
||||
|
||||
@@ -28,3 +36,53 @@ class TestResponsesUtils:
|
||||
result = convert_tool_responses_to_completions_format(input_tool)
|
||||
|
||||
assert result == {"type": "function", "function": input_tool}
|
||||
|
||||
def test_construct_chat_message_with_tool_call(self):
|
||||
item = ResponseReasoningItem(
|
||||
id="lol",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Leroy Jenkins",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted_item = construct_chat_message_with_tool_call(item)
|
||||
assert formatted_item["role"] == "assistant"
|
||||
assert formatted_item["reasoning"] == "Leroy Jenkins"
|
||||
|
||||
item = ResponseReasoningItem(
|
||||
id="lol",
|
||||
summary=[
|
||||
Summary(
|
||||
text='Hmm, the user has just started with a simple "Hello,"',
|
||||
type="summary_text",
|
||||
)
|
||||
],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
|
||||
formatted_item = construct_chat_message_with_tool_call(item)
|
||||
assert formatted_item["role"] == "assistant"
|
||||
assert (
|
||||
formatted_item["reasoning"]
|
||||
== 'Hmm, the user has just started with a simple "Hello,"'
|
||||
)
|
||||
|
||||
item = ResponseReasoningItem(
|
||||
id="lol",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content="TOP_SECRET_MESSAGE",
|
||||
status=None,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
construct_chat_message_with_tool_call(item)
|
||||
|
||||
Reference in New Issue
Block a user