diff --git a/tests/entrypoints/openai/responses/conftest.py b/tests/entrypoints/openai/responses/conftest.py index 3d300849e..68fdbbba3 100644 --- a/tests/entrypoints/openai/responses/conftest.py +++ b/tests/entrypoints/openai/responses/conftest.py @@ -8,6 +8,9 @@ from collections.abc import Callable from typing import Any import pytest +import pytest_asyncio + +from tests.utils import RemoteOpenAIServer logger = logging.getLogger(__name__) @@ -361,3 +364,38 @@ def log_response_diagnostics( ) return diagnostics + + +@pytest.fixture(scope="module") +def default_server_args(): + return [ + "--max-model-len", + "8192", + "--enforce-eager", # For faster startup. + "--enable-auto-tool-choice", + "--structured-outputs-config.backend", + "xgrammar", + "--tool-call-parser", + "hermes", + "--reasoning-parser", + "qwen3", + ] + + +@pytest.fixture(scope="module") +def server_with_store(default_server_args): + with RemoteOpenAIServer( + "Qwen/Qwen3-1.7B", + default_server_args, + env_dict={ + "VLLM_ENABLE_RESPONSES_API_STORE": "1", + "VLLM_SERVER_DEV_MODE": "1", + }, + ) as remote_server: + yield remote_server + + +@pytest_asyncio.fixture +async def client(server_with_store): + async with server_with_store.get_async_client() as async_client: + yield async_client diff --git a/tests/v1/entrypoints/openai/serving_responses/test_basic.py b/tests/entrypoints/openai/responses/test_basic.py similarity index 100% rename from tests/v1/entrypoints/openai/serving_responses/test_basic.py rename to tests/entrypoints/openai/responses/test_basic.py diff --git a/tests/v1/entrypoints/openai/serving_responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py similarity index 100% rename from tests/v1/entrypoints/openai/serving_responses/test_function_call.py rename to tests/entrypoints/openai/responses/test_function_call.py diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py index 3bc041ba4..74f3360df 100644 --- a/tests/entrypoints/openai/responses/test_harmony.py +++ b/tests/entrypoints/openai/responses/test_harmony.py @@ -16,7 +16,8 @@ import requests from openai import InternalServerError, NotFoundError, OpenAI from openai_harmony import Message -from ....utils import RemoteOpenAIServer +from tests.utils import RemoteOpenAIServer + from .conftest import ( BASE_TEST_ENV, events_contain_type, diff --git a/tests/v1/entrypoints/openai/serving_responses/test_image.py b/tests/entrypoints/openai/responses/test_image.py similarity index 100% rename from tests/v1/entrypoints/openai/serving_responses/test_image.py rename to tests/entrypoints/openai/responses/test_image.py diff --git a/tests/entrypoints/openai/responses/test_mcp_tools.py b/tests/entrypoints/openai/responses/test_mcp_tools.py index 55445f188..eb3c5becc 100644 --- a/tests/entrypoints/openai/responses/test_mcp_tools.py +++ b/tests/entrypoints/openai/responses/test_mcp_tools.py @@ -9,9 +9,9 @@ import pytest_asyncio from openai import OpenAI from openai_harmony import ToolDescription, ToolNamespaceConfig +from tests.utils import RemoteOpenAIServer from vllm.entrypoints.mcp.tool_server import MCPToolServer -from ....utils import RemoteOpenAIServer from .conftest import ( BASE_TEST_ENV, events_contain_type, diff --git a/tests/entrypoints/openai/responses/test_parsable_context.py b/tests/entrypoints/openai/responses/test_parsable_context.py index 280bacf47..292edda9a 100644 --- a/tests/entrypoints/openai/responses/test_parsable_context.py +++ b/tests/entrypoints/openai/responses/test_parsable_context.py @@ -9,7 +9,8 @@ import pytest import pytest_asyncio from openai import OpenAI -from ....utils import RemoteOpenAIServer +from tests.utils import RemoteOpenAIServer + from .conftest import ( BASE_TEST_ENV, has_output_type, diff --git a/tests/entrypoints/openai/responses/test_simple.py b/tests/entrypoints/openai/responses/test_simple.py index 744aa068a..1f382f61b 100644 --- a/tests/entrypoints/openai/responses/test_simple.py +++ b/tests/entrypoints/openai/responses/test_simple.py @@ -5,7 +5,8 @@ import pytest import pytest_asyncio from openai import OpenAI -from ....utils import RemoteOpenAIServer +from tests.utils import RemoteOpenAIServer + from .conftest import validate_streaming_event_stack MODEL_NAME = "Qwen/Qwen3-8B" diff --git a/tests/v1/entrypoints/openai/serving_responses/test_stateful.py b/tests/entrypoints/openai/responses/test_stateful.py similarity index 100% rename from tests/v1/entrypoints/openai/serving_responses/test_stateful.py rename to tests/entrypoints/openai/responses/test_stateful.py diff --git a/tests/v1/entrypoints/openai/serving_responses/test_structured_output.py b/tests/entrypoints/openai/responses/test_structured_output.py similarity index 100% rename from tests/v1/entrypoints/openai/serving_responses/test_structured_output.py rename to tests/entrypoints/openai/responses/test_structured_output.py diff --git a/tests/v1/entrypoints/openai/serving_responses/__init__.py b/tests/v1/entrypoints/openai/serving_responses/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/v1/entrypoints/openai/serving_responses/conftest.py b/tests/v1/entrypoints/openai/serving_responses/conftest.py deleted file mode 100644 index b948b6d05..000000000 --- a/tests/v1/entrypoints/openai/serving_responses/conftest.py +++ /dev/null @@ -1,44 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest -import pytest_asyncio - -from tests.utils import RemoteOpenAIServer - -# Use a small reasoning model to test the responses API. -MODEL_NAME = "Qwen/Qwen3-1.7B" - - -@pytest.fixture(scope="module") -def default_server_args(): - return [ - "--max-model-len", - "8192", - "--enforce-eager", # For faster startup. - "--enable-auto-tool-choice", - "--structured-outputs-config.backend", - "xgrammar", - "--tool-call-parser", - "hermes", - "--reasoning-parser", - "qwen3", - ] - - -@pytest.fixture(scope="module") -def server_with_store(default_server_args): - with RemoteOpenAIServer( - MODEL_NAME, - default_server_args, - env_dict={ - "VLLM_ENABLE_RESPONSES_API_STORE": "1", - "VLLM_SERVER_DEV_MODE": "1", - }, - ) as remote_server: - yield remote_server - - -@pytest_asyncio.fixture -async def client(server_with_store): - async with server_with_store.get_async_client() as async_client: - yield async_client