Implement OpenAI Responses API [1/N] (#20504)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-07-06 18:32:13 -07:00
committed by GitHub
parent c18b3b8e8b
commit 462b269280
12 changed files with 1106 additions and 8 deletions

View File

@@ -0,0 +1,32 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio
from tests.utils import RemoteOpenAIServer
# Use a small reasoning model to test the responses API.
MODEL_NAME = "Qwen/Qwen3-0.6B"
@pytest.fixture(scope="module")
def default_server_args():
return [
"--max-model-len",
"8192",
"--enforce-eager", # For faster startup.
"--reasoning-parser",
"deepseek_r1",
]
@pytest.fixture(scope="module")
def server(default_server_args):
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
yield remote_server
@pytest_asyncio.fixture
async def client(server):
async with server.get_async_client() as async_client:
yield async_client