[Refactor] Relocate responses API tests (#37241)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-03-17 01:14:52 -04:00
committed by GitHub
parent 54a62a79f7
commit 3e3d320c1b
12 changed files with 45 additions and 48 deletions

View File

@@ -8,6 +8,9 @@ from collections.abc import Callable
from typing import Any
import pytest
import pytest_asyncio
from tests.utils import RemoteOpenAIServer
logger = logging.getLogger(__name__)
@@ -361,3 +364,38 @@ def log_response_diagnostics(
)
return diagnostics
@pytest.fixture(scope="module")
def default_server_args():
return [
"--max-model-len",
"8192",
"--enforce-eager", # For faster startup.
"--enable-auto-tool-choice",
"--structured-outputs-config.backend",
"xgrammar",
"--tool-call-parser",
"hermes",
"--reasoning-parser",
"qwen3",
]
@pytest.fixture(scope="module")
def server_with_store(default_server_args):
with RemoteOpenAIServer(
"Qwen/Qwen3-1.7B",
default_server_args,
env_dict={
"VLLM_ENABLE_RESPONSES_API_STORE": "1",
"VLLM_SERVER_DEV_MODE": "1",
},
) as remote_server:
yield remote_server
@pytest_asyncio.fixture
async def client(server_with_store):
async with server_with_store.get_async_client() as async_client:
yield async_client

View File

@@ -16,7 +16,8 @@ import requests
from openai import InternalServerError, NotFoundError, OpenAI
from openai_harmony import Message
from ....utils import RemoteOpenAIServer
from tests.utils import RemoteOpenAIServer
from .conftest import (
BASE_TEST_ENV,
events_contain_type,

View File

@@ -9,9 +9,9 @@ import pytest_asyncio
from openai import OpenAI
from openai_harmony import ToolDescription, ToolNamespaceConfig
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.mcp.tool_server import MCPToolServer
from ....utils import RemoteOpenAIServer
from .conftest import (
BASE_TEST_ENV,
events_contain_type,

View File

@@ -9,7 +9,8 @@ import pytest
import pytest_asyncio
from openai import OpenAI
from ....utils import RemoteOpenAIServer
from tests.utils import RemoteOpenAIServer
from .conftest import (
BASE_TEST_ENV,
has_output_type,

View File

@@ -5,7 +5,8 @@ import pytest
import pytest_asyncio
from openai import OpenAI
from ....utils import RemoteOpenAIServer
from tests.utils import RemoteOpenAIServer
from .conftest import validate_streaming_event_stack
MODEL_NAME = "Qwen/Qwen3-8B"

View File

@@ -1,44 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio
from tests.utils import RemoteOpenAIServer
# Use a small reasoning model to test the responses API.
MODEL_NAME = "Qwen/Qwen3-1.7B"
@pytest.fixture(scope="module")
def default_server_args():
return [
"--max-model-len",
"8192",
"--enforce-eager", # For faster startup.
"--enable-auto-tool-choice",
"--structured-outputs-config.backend",
"xgrammar",
"--tool-call-parser",
"hermes",
"--reasoning-parser",
"qwen3",
]
@pytest.fixture(scope="module")
def server_with_store(default_server_args):
with RemoteOpenAIServer(
MODEL_NAME,
default_server_args,
env_dict={
"VLLM_ENABLE_RESPONSES_API_STORE": "1",
"VLLM_SERVER_DEV_MODE": "1",
},
) as remote_server:
yield remote_server
@pytest_asyncio.fixture
async def client(server_with_store):
async with server_with_store.get_async_client() as async_client:
yield async_client