diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 1c43c404d..407e3c5a6 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -333,15 +333,15 @@ apply_rocm_test_overrides() { # --- Entrypoint ignores --- if [[ $cmds == *" entrypoints/openai "* ]]; then cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \ - --ignore=entrypoints/openai/test_audio.py \ - --ignore=entrypoints/openai/test_shutdown.py \ + --ignore=entrypoints/openai/chat_completion/test_audio.py \ + --ignore=entrypoints/openai/completion/test_shutdown.py \ --ignore=entrypoints/openai/test_completion.py \ --ignore=entrypoints/openai/test_models.py \ --ignore=entrypoints/openai/test_lora_adapters.py \ --ignore=entrypoints/openai/test_return_tokens_as_ids.py \ - --ignore=entrypoints/openai/test_root_path.py \ + --ignore=entrypoints/openai/chat_completion/test_root_path.py \ --ignore=entrypoints/openai/test_tokenization.py \ - --ignore=entrypoints/openai/test_prompt_validation.py "} + --ignore=entrypoints/openai/completion/test_prompt_validation.py "} fi if [[ $cmds == *" entrypoints/llm "* ]]; then diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 7f8020540..eb331aaf9 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -162,7 +162,7 @@ steps: - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py - label: Entrypoints Integration Test (API Server 2) @@ -674,12 +674,12 @@ steps: - vllm/config/model.py - vllm/model_executor - tests/model_executor - - tests/entrypoints/openai/test_tensorizer_entrypoint.py + - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py + - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py - label: Benchmarks # 11min timeout_in_minutes: 20 @@ -1143,7 +1143,7 @@ steps: - pytest -v -s plugins_tests/test_scheduler_plugins.py - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - - pytest -v -s entrypoints/openai/test_oot_registration.py + - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py - pytest -v -s models/test_oot_registration.py - pytest -v -s plugins/lora_resolvers @@ -1502,7 +1502,7 @@ steps: - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py - label: Entrypoints Integration Test (API Server 2) @@ -2133,12 +2133,12 @@ steps: - vllm/config/model.py - vllm/model_executor - tests/model_executor - - tests/entrypoints/openai/test_tensorizer_entrypoint.py + - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py + - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py - label: Benchmarks # 11min timeout_in_minutes: 20 @@ -2735,7 +2735,7 @@ steps: - pytest -v -s plugins_tests/test_scheduler_plugins.py - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process + - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins @@ -3257,7 +3257,7 @@ steps: - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py - label: Entrypoints Integration Test (API Server 2) @@ -3872,12 +3872,12 @@ steps: - vllm/config/model.py - vllm/model_executor - tests/model_executor - - tests/entrypoints/openai/test_tensorizer_entrypoint.py + - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py + - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py - label: Benchmarks # 11min timeout_in_minutes: 20 @@ -4508,7 +4508,7 @@ steps: - pytest -v -s plugins_tests/test_scheduler_plugins.py - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process + - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml index 9de9c3fd2..ac6be8e14 100644 --- a/.buildkite/test_areas/entrypoints.yaml +++ b/.buildkite/test_areas/entrypoints.yaml @@ -34,7 +34,7 @@ steps: - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py mirror: amd: diff --git a/.buildkite/test_areas/model_executor.yaml b/.buildkite/test_areas/model_executor.yaml index 996c8bb8b..496ecca39 100644 --- a/.buildkite/test_areas/model_executor.yaml +++ b/.buildkite/test_areas/model_executor.yaml @@ -9,9 +9,9 @@ steps: - vllm/config/model.py - vllm/model_executor - tests/model_executor - - tests/entrypoints/openai/test_tensorizer_entrypoint.py + - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py + - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py diff --git a/.buildkite/test_areas/plugins.yaml b/.buildkite/test_areas/plugins.yaml index 7e7727fce..8e0eb0284 100644 --- a/.buildkite/test_areas/plugins.yaml +++ b/.buildkite/test_areas/plugins.yaml @@ -36,6 +36,6 @@ steps: - pytest -v -s plugins_tests/test_scheduler_plugins.py - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process + - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins diff --git a/.github/mergify.yml b/.github/mergify.yml index c6d1f1fed..8e9cb790b 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -381,7 +381,7 @@ pull_request_rules: - or: - files~=^vllm/model_executor/model_loader/tensorizer.py - files~=^vllm/model_executor/model_loader/tensorizer_loader.py - - files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py + - files~=^tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py - files~=^tests/model_executor/model_loader/tensorizer_loader/ actions: assign: diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 9014ab1ea..9a7bd9f59 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -50,7 +50,7 @@ av==16.1.0 blobfile==3.0.0 # Multi-Modal Models Test decord==0.6.0 - # video processing, required by entrypoints/openai/test_video.py + # video processing, required by entrypoints/openai/chat_completion/test_video.py rapidfuzz==3.12.1 # OpenAI compatibility and testing diff --git a/tests/distributed/test_distributed_oot.py b/tests/distributed/test_distributed_oot.py index ea7a88abd..9bd7603e7 100644 --- a/tests/distributed/test_distributed_oot.py +++ b/tests/distributed/test_distributed_oot.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from ..entrypoints.openai.test_oot_registration import run_and_test_dummy_opt_api_server +from tests.entrypoints.openai.chat_completion.test_oot_registration import ( + run_and_test_dummy_opt_api_server, +) def test_distributed_oot(dummy_opt_path: str): diff --git a/tests/entrypoints/llm/test_chat.py b/tests/entrypoints/llm/test_chat.py index 20ed73e26..7d8a09852 100644 --- a/tests/entrypoints/llm/test_chat.py +++ b/tests/entrypoints/llm/test_chat.py @@ -4,12 +4,11 @@ import weakref import pytest +from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS from vllm import LLM from vllm.distributed import cleanup_dist_env_and_memory from vllm.sampling_params import SamplingParams -from ..openai.test_vision import TEST_IMAGE_ASSETS - @pytest.fixture(scope="function") def text_llm(): diff --git a/tests/entrypoints/llm/test_mm_cache_stats.py b/tests/entrypoints/llm/test_mm_cache_stats.py index e5ee99124..62c6aa9f7 100644 --- a/tests/entrypoints/llm/test_mm_cache_stats.py +++ b/tests/entrypoints/llm/test_mm_cache_stats.py @@ -6,13 +6,12 @@ import logging import pytest import regex as re +from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS from vllm import LLM from vllm.entrypoints.chat_utils import ChatCompletionMessageParam from vllm.v1.metrics import loggers as stat_loggers from vllm.v1.metrics.reader import Counter, Metric -from ..openai.test_vision import TEST_IMAGE_ASSETS - def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]: return [ diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/chat_completion/test_audio.py similarity index 99% rename from tests/entrypoints/openai/test_audio.py rename to tests/entrypoints/openai/chat_completion/test_audio.py index 9fe1d906d..fa0f141af 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/chat_completion/test_audio.py @@ -7,11 +7,10 @@ import openai import pytest import pytest_asyncio +from tests.utils import RemoteOpenAIServer from vllm.assets.audio import AudioAsset from vllm.multimodal.utils import encode_audio_base64, encode_audio_url, fetch_audio -from ...utils import RemoteOpenAIServer - MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b" TEST_AUDIO_URLS = [ AudioAsset("winning_call").url, diff --git a/tests/entrypoints/openai/test_audio_in_video.py b/tests/entrypoints/openai/chat_completion/test_audio_in_video.py similarity index 98% rename from tests/entrypoints/openai/test_audio_in_video.py rename to tests/entrypoints/openai/chat_completion/test_audio_in_video.py index 334d9a71e..769390309 100644 --- a/tests/entrypoints/openai/test_audio_in_video.py +++ b/tests/entrypoints/openai/chat_completion/test_audio_in_video.py @@ -8,8 +8,8 @@ import openai import pytest import pytest_asyncio -from ...conftest import VideoTestAssets -from ...utils import RemoteOpenAIServer +from tests.conftest import VideoTestAssets +from tests.utils import RemoteOpenAIServer MODEL_NAME = "Qwen/Qwen2.5-Omni-3B" diff --git a/tests/entrypoints/openai/test_default_mm_loras.py b/tests/entrypoints/openai/chat_completion/test_default_mm_loras.py similarity index 97% rename from tests/entrypoints/openai/test_default_mm_loras.py rename to tests/entrypoints/openai/chat_completion/test_default_mm_loras.py index dd8f9d67d..e285c8d31 100644 --- a/tests/entrypoints/openai/test_default_mm_loras.py +++ b/tests/entrypoints/openai/chat_completion/test_default_mm_loras.py @@ -8,8 +8,8 @@ import pytest import pytest_asyncio from huggingface_hub import snapshot_download -from ...conftest import AudioTestAssets -from ...utils import RemoteOpenAIServer +from tests.conftest import AudioTestAssets +from tests.utils import RemoteOpenAIServer # NOTE - the tests in this module are currently analogous to test_chat, but are # separated to avoid OOM killing due to module-scoped servers, since we diff --git a/tests/entrypoints/openai/test_oot_registration.py b/tests/entrypoints/openai/chat_completion/test_oot_registration.py similarity index 96% rename from tests/entrypoints/openai/test_oot_registration.py rename to tests/entrypoints/openai/chat_completion/test_oot_registration.py index ba463be1d..151373d82 100644 --- a/tests/entrypoints/openai/test_oot_registration.py +++ b/tests/entrypoints/openai/chat_completion/test_oot_registration.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from ...utils import VLLM_PATH, RemoteOpenAIServer +from tests.utils import VLLM_PATH, RemoteOpenAIServer chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" assert chatml_jinja_path.exists() diff --git a/tests/entrypoints/openai/test_root_path.py b/tests/entrypoints/openai/chat_completion/test_root_path.py similarity index 98% rename from tests/entrypoints/openai/test_root_path.py rename to tests/entrypoints/openai/chat_completion/test_root_path.py index 6bcb80878..9b3f30255 100644 --- a/tests/entrypoints/openai/test_root_path.py +++ b/tests/entrypoints/openai/chat_completion/test_root_path.py @@ -8,7 +8,7 @@ from typing import Any, NamedTuple import openai # use the official client for correctness check import pytest -from ...utils import RemoteOpenAIServer +from tests.utils import RemoteOpenAIServer # # any model with a chat template should work here MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/chat_completion/test_video.py similarity index 99% rename from tests/entrypoints/openai/test_video.py rename to tests/entrypoints/openai/chat_completion/test_video.py index 47450c30b..a5827c9f9 100644 --- a/tests/entrypoints/openai/test_video.py +++ b/tests/entrypoints/openai/chat_completion/test_video.py @@ -7,11 +7,10 @@ import openai import pytest import pytest_asyncio +from tests.utils import RemoteOpenAIServer from vllm.multimodal.utils import encode_video_url, fetch_video from vllm.platforms import current_platform -from ...utils import RemoteOpenAIServer - MODEL_NAME = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf" MAXIMUM_VIDEOS = 3 diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/chat_completion/test_vision.py similarity index 99% rename from tests/entrypoints/openai/test_vision.py rename to tests/entrypoints/openai/chat_completion/test_vision.py index c0d8b0532..6cb843342 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/chat_completion/test_vision.py @@ -8,12 +8,11 @@ import pytest import pytest_asyncio from transformers import AutoProcessor +from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer from vllm.multimodal.media import MediaWithBytes from vllm.multimodal.utils import encode_image_url, fetch_image from vllm.platforms import current_platform -from ...utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer - MODEL_NAME = "microsoft/Phi-3.5-vision-instruct" MAXIMUM_IMAGES = 2 diff --git a/tests/entrypoints/openai/test_vision_embeds.py b/tests/entrypoints/openai/chat_completion/test_vision_embeds.py similarity index 99% rename from tests/entrypoints/openai/test_vision_embeds.py rename to tests/entrypoints/openai/chat_completion/test_vision_embeds.py index b3da30102..82cb84bcc 100644 --- a/tests/entrypoints/openai/test_vision_embeds.py +++ b/tests/entrypoints/openai/chat_completion/test_vision_embeds.py @@ -8,10 +8,9 @@ import pytest import requests import torch +from tests.utils import RemoteOpenAIServer from vllm.utils.serial_utils import tensor2base64 -from ...utils import RemoteOpenAIServer - @pytest.mark.parametrize( "model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] diff --git a/tests/entrypoints/openai/completion/__init__.py b/tests/entrypoints/openai/completion/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/entrypoints/openai/test_completion_error.py b/tests/entrypoints/openai/completion/test_completion_error.py similarity index 100% rename from tests/entrypoints/openai/test_completion_error.py rename to tests/entrypoints/openai/completion/test_completion_error.py diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/completion/test_completion_with_prompt_embeds.py similarity index 99% rename from tests/entrypoints/openai/test_completion_with_prompt_embeds.py rename to tests/entrypoints/openai/completion/test_completion_with_prompt_embeds.py index f8a19e40b..374e77245 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/completion/test_completion_with_prompt_embeds.py @@ -14,7 +14,7 @@ import torch from openai import BadRequestError from transformers import AutoConfig -from ...utils import RemoteOpenAIServer +from tests.utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "facebook/opt-125m" diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/completion/test_lora_resolvers.py similarity index 100% rename from tests/entrypoints/openai/test_lora_resolvers.py rename to tests/entrypoints/openai/completion/test_lora_resolvers.py diff --git a/tests/entrypoints/openai/test_prompt_validation.py b/tests/entrypoints/openai/completion/test_prompt_validation.py similarity index 98% rename from tests/entrypoints/openai/test_prompt_validation.py rename to tests/entrypoints/openai/completion/test_prompt_validation.py index 5aff3b3c7..f44d13c55 100644 --- a/tests/entrypoints/openai/test_prompt_validation.py +++ b/tests/entrypoints/openai/completion/test_prompt_validation.py @@ -11,11 +11,10 @@ import pytest import regex as re import torch +from tests.utils import RemoteOpenAIServer from vllm.config import ModelConfig from vllm.renderers.embed_utils import safe_load_prompt_embeds -from ...utils import RemoteOpenAIServer - @pytest.mark.asyncio async def test_empty_prompt(): diff --git a/tests/entrypoints/openai/test_shutdown.py b/tests/entrypoints/openai/completion/test_shutdown.py similarity index 100% rename from tests/entrypoints/openai/test_shutdown.py rename to tests/entrypoints/openai/completion/test_shutdown.py diff --git a/tests/entrypoints/openai/test_tensorizer_entrypoint.py b/tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py similarity index 98% rename from tests/entrypoints/openai/test_tensorizer_entrypoint.py rename to tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py index 9ac9106db..29c0c2dc8 100644 --- a/tests/entrypoints/openai/test_tensorizer_entrypoint.py +++ b/tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py @@ -9,6 +9,7 @@ import pytest import pytest_asyncio import torch.cuda +from tests.utils import RemoteOpenAIServer from vllm.engine.arg_utils import EngineArgs from vllm.model_executor.model_loader.tensorizer import ( TensorizerConfig, @@ -17,8 +18,6 @@ from vllm.model_executor.model_loader.tensorizer import ( ) from vllm.platforms import current_platform -from ...utils import RemoteOpenAIServer - MODEL_NAME = "unsloth/llama-3.2-1b-Instruct" LORA_PATH = "davzoku/finqa_adapter_1b" diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/completion/test_token_in_token_out.py similarity index 98% rename from tests/entrypoints/openai/test_token_in_token_out.py rename to tests/entrypoints/openai/completion/test_token_in_token_out.py index c7f8abe27..8882ae624 100644 --- a/tests/entrypoints/openai/test_token_in_token_out.py +++ b/tests/entrypoints/openai/completion/test_token_in_token_out.py @@ -6,11 +6,10 @@ import tempfile import pytest +from tests.utils import RemoteOpenAIServer from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf from vllm.tokenizers import get_tokenizer -from ...utils import RemoteOpenAIServer - MODEL_NAME = "Qwen/Qwen3-0.6B" MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")