[Refactor] Relocate responses API tests (#37241)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -8,6 +8,9 @@ from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -361,3 +364,38 @@ def log_response_diagnostics(
|
||||
)
|
||||
|
||||
return diagnostics
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def default_server_args():
|
||||
return [
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
"--enforce-eager", # For faster startup.
|
||||
"--enable-auto-tool-choice",
|
||||
"--structured-outputs-config.backend",
|
||||
"xgrammar",
|
||||
"--tool-call-parser",
|
||||
"hermes",
|
||||
"--reasoning-parser",
|
||||
"qwen3",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server_with_store(default_server_args):
|
||||
with RemoteOpenAIServer(
|
||||
"Qwen/Qwen3-1.7B",
|
||||
default_server_args,
|
||||
env_dict={
|
||||
"VLLM_ENABLE_RESPONSES_API_STORE": "1",
|
||||
"VLLM_SERVER_DEV_MODE": "1",
|
||||
},
|
||||
) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server_with_store):
|
||||
async with server_with_store.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
@@ -16,7 +16,8 @@ import requests
|
||||
from openai import InternalServerError, NotFoundError, OpenAI
|
||||
from openai_harmony import Message
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
from .conftest import (
|
||||
BASE_TEST_ENV,
|
||||
events_contain_type,
|
||||
|
||||
@@ -9,9 +9,9 @@ import pytest_asyncio
|
||||
from openai import OpenAI
|
||||
from openai_harmony import ToolDescription, ToolNamespaceConfig
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.entrypoints.mcp.tool_server import MCPToolServer
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
from .conftest import (
|
||||
BASE_TEST_ENV,
|
||||
events_contain_type,
|
||||
|
||||
@@ -9,7 +9,8 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from openai import OpenAI
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
from .conftest import (
|
||||
BASE_TEST_ENV,
|
||||
has_output_type,
|
||||
|
||||
@@ -5,7 +5,8 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from openai import OpenAI
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
from .conftest import validate_streaming_event_stack
|
||||
|
||||
MODEL_NAME = "Qwen/Qwen3-8B"
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
# Use a small reasoning model to test the responses API.
|
||||
MODEL_NAME = "Qwen/Qwen3-1.7B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def default_server_args():
|
||||
return [
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
"--enforce-eager", # For faster startup.
|
||||
"--enable-auto-tool-choice",
|
||||
"--structured-outputs-config.backend",
|
||||
"xgrammar",
|
||||
"--tool-call-parser",
|
||||
"hermes",
|
||||
"--reasoning-parser",
|
||||
"qwen3",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server_with_store(default_server_args):
|
||||
with RemoteOpenAIServer(
|
||||
MODEL_NAME,
|
||||
default_server_args,
|
||||
env_dict={
|
||||
"VLLM_ENABLE_RESPONSES_API_STORE": "1",
|
||||
"VLLM_SERVER_DEV_MODE": "1",
|
||||
},
|
||||
) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server_with_store):
|
||||
async with server_with_store.get_async_client() as async_client:
|
||||
yield async_client
|
||||
Reference in New Issue
Block a user