[Refactor] Relocate completion and chat completion tests (#37125)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -333,15 +333,15 @@ apply_rocm_test_overrides() {
|
|||||||
# --- Entrypoint ignores ---
|
# --- Entrypoint ignores ---
|
||||||
if [[ $cmds == *" entrypoints/openai "* ]]; then
|
if [[ $cmds == *" entrypoints/openai "* ]]; then
|
||||||
cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
|
cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
|
||||||
--ignore=entrypoints/openai/test_audio.py \
|
--ignore=entrypoints/openai/chat_completion/test_audio.py \
|
||||||
--ignore=entrypoints/openai/test_shutdown.py \
|
--ignore=entrypoints/openai/completion/test_shutdown.py \
|
||||||
--ignore=entrypoints/openai/test_completion.py \
|
--ignore=entrypoints/openai/test_completion.py \
|
||||||
--ignore=entrypoints/openai/test_models.py \
|
--ignore=entrypoints/openai/test_models.py \
|
||||||
--ignore=entrypoints/openai/test_lora_adapters.py \
|
--ignore=entrypoints/openai/test_lora_adapters.py \
|
||||||
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
|
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
|
||||||
--ignore=entrypoints/openai/test_root_path.py \
|
--ignore=entrypoints/openai/chat_completion/test_root_path.py \
|
||||||
--ignore=entrypoints/openai/test_tokenization.py \
|
--ignore=entrypoints/openai/test_tokenization.py \
|
||||||
--ignore=entrypoints/openai/test_prompt_validation.py "}
|
--ignore=entrypoints/openai/completion/test_prompt_validation.py "}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $cmds == *" entrypoints/llm "* ]]; then
|
if [[ $cmds == *" entrypoints/llm "* ]]; then
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ steps:
|
|||||||
- tests/entrypoints/test_chat_utils
|
- tests/entrypoints/test_chat_utils
|
||||||
commands:
|
commands:
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||||
- pytest -v -s entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/test_chat_utils.py
|
||||||
|
|
||||||
- label: Entrypoints Integration Test (API Server 2)
|
- label: Entrypoints Integration Test (API Server 2)
|
||||||
@@ -674,12 +674,12 @@ steps:
|
|||||||
- vllm/config/model.py
|
- vllm/config/model.py
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
commands:
|
commands:
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s model_executor
|
- pytest -v -s model_executor
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
|
|
||||||
- label: Benchmarks # 11min
|
- label: Benchmarks # 11min
|
||||||
timeout_in_minutes: 20
|
timeout_in_minutes: 20
|
||||||
@@ -1143,7 +1143,7 @@ steps:
|
|||||||
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
||||||
- pip install -e ./plugins/vllm_add_dummy_model
|
- pip install -e ./plugins/vllm_add_dummy_model
|
||||||
- pytest -v -s distributed/test_distributed_oot.py
|
- pytest -v -s distributed/test_distributed_oot.py
|
||||||
- pytest -v -s entrypoints/openai/test_oot_registration.py
|
- pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py
|
||||||
- pytest -v -s models/test_oot_registration.py
|
- pytest -v -s models/test_oot_registration.py
|
||||||
- pytest -v -s plugins/lora_resolvers
|
- pytest -v -s plugins/lora_resolvers
|
||||||
|
|
||||||
@@ -1502,7 +1502,7 @@ steps:
|
|||||||
- tests/entrypoints/test_chat_utils
|
- tests/entrypoints/test_chat_utils
|
||||||
commands:
|
commands:
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||||
- pytest -v -s entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/test_chat_utils.py
|
||||||
|
|
||||||
- label: Entrypoints Integration Test (API Server 2)
|
- label: Entrypoints Integration Test (API Server 2)
|
||||||
@@ -2133,12 +2133,12 @@ steps:
|
|||||||
- vllm/config/model.py
|
- vllm/config/model.py
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
commands:
|
commands:
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s model_executor
|
- pytest -v -s model_executor
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
|
|
||||||
- label: Benchmarks # 11min
|
- label: Benchmarks # 11min
|
||||||
timeout_in_minutes: 20
|
timeout_in_minutes: 20
|
||||||
@@ -2735,7 +2735,7 @@ steps:
|
|||||||
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
||||||
- pip install -e ./plugins/vllm_add_dummy_model
|
- pip install -e ./plugins/vllm_add_dummy_model
|
||||||
- pytest -v -s distributed/test_distributed_oot.py
|
- pytest -v -s distributed/test_distributed_oot.py
|
||||||
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
|
- pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
||||||
|
|
||||||
@@ -3257,7 +3257,7 @@ steps:
|
|||||||
- tests/entrypoints/test_chat_utils
|
- tests/entrypoints/test_chat_utils
|
||||||
commands:
|
commands:
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||||
- pytest -v -s entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/test_chat_utils.py
|
||||||
|
|
||||||
- label: Entrypoints Integration Test (API Server 2)
|
- label: Entrypoints Integration Test (API Server 2)
|
||||||
@@ -3872,12 +3872,12 @@ steps:
|
|||||||
- vllm/config/model.py
|
- vllm/config/model.py
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
commands:
|
commands:
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s model_executor
|
- pytest -v -s model_executor
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
|
|
||||||
- label: Benchmarks # 11min
|
- label: Benchmarks # 11min
|
||||||
timeout_in_minutes: 20
|
timeout_in_minutes: 20
|
||||||
@@ -4508,7 +4508,7 @@ steps:
|
|||||||
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
||||||
- pip install -e ./plugins/vllm_add_dummy_model
|
- pip install -e ./plugins/vllm_add_dummy_model
|
||||||
- pytest -v -s distributed/test_distributed_oot.py
|
- pytest -v -s distributed/test_distributed_oot.py
|
||||||
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
|
- pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ steps:
|
|||||||
- tests/entrypoints/test_chat_utils
|
- tests/entrypoints/test_chat_utils
|
||||||
commands:
|
commands:
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||||
- pytest -v -s entrypoints/test_chat_utils.py
|
- pytest -v -s entrypoints/test_chat_utils.py
|
||||||
mirror:
|
mirror:
|
||||||
amd:
|
amd:
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ steps:
|
|||||||
- vllm/config/model.py
|
- vllm/config/model.py
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
commands:
|
commands:
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s model_executor
|
- pytest -v -s model_executor
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
|
|||||||
@@ -36,6 +36,6 @@ steps:
|
|||||||
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
- pytest -v -s plugins_tests/test_scheduler_plugins.py
|
||||||
- pip install -e ./plugins/vllm_add_dummy_model
|
- pip install -e ./plugins/vllm_add_dummy_model
|
||||||
- pytest -v -s distributed/test_distributed_oot.py
|
- pytest -v -s distributed/test_distributed_oot.py
|
||||||
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
|
- pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
- pytest -v -s models/test_oot_registration.py # it needs a clean process
|
||||||
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
||||||
|
|||||||
2
.github/mergify.yml
vendored
2
.github/mergify.yml
vendored
@@ -381,7 +381,7 @@ pull_request_rules:
|
|||||||
- or:
|
- or:
|
||||||
- files~=^vllm/model_executor/model_loader/tensorizer.py
|
- files~=^vllm/model_executor/model_loader/tensorizer.py
|
||||||
- files~=^vllm/model_executor/model_loader/tensorizer_loader.py
|
- files~=^vllm/model_executor/model_loader/tensorizer_loader.py
|
||||||
- files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- files~=^tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||||
- files~=^tests/model_executor/model_loader/tensorizer_loader/
|
- files~=^tests/model_executor/model_loader/tensorizer_loader/
|
||||||
actions:
|
actions:
|
||||||
assign:
|
assign:
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ av==16.1.0
|
|||||||
blobfile==3.0.0
|
blobfile==3.0.0
|
||||||
# Multi-Modal Models Test
|
# Multi-Modal Models Test
|
||||||
decord==0.6.0
|
decord==0.6.0
|
||||||
# video processing, required by entrypoints/openai/test_video.py
|
# video processing, required by entrypoints/openai/chat_completion/test_video.py
|
||||||
rapidfuzz==3.12.1
|
rapidfuzz==3.12.1
|
||||||
|
|
||||||
# OpenAI compatibility and testing
|
# OpenAI compatibility and testing
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
from ..entrypoints.openai.test_oot_registration import run_and_test_dummy_opt_api_server
|
from tests.entrypoints.openai.chat_completion.test_oot_registration import (
|
||||||
|
run_and_test_dummy_opt_api_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_distributed_oot(dummy_opt_path: str):
|
def test_distributed_oot(dummy_opt_path: str):
|
||||||
|
|||||||
@@ -4,12 +4,11 @@ import weakref
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS
|
||||||
from vllm import LLM
|
from vllm import LLM
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
from ..openai.test_vision import TEST_IMAGE_ASSETS
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def text_llm():
|
def text_llm():
|
||||||
|
|||||||
@@ -6,13 +6,12 @@ import logging
|
|||||||
import pytest
|
import pytest
|
||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
|
from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS
|
||||||
from vllm import LLM
|
from vllm import LLM
|
||||||
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
||||||
from vllm.v1.metrics import loggers as stat_loggers
|
from vllm.v1.metrics import loggers as stat_loggers
|
||||||
from vllm.v1.metrics.reader import Counter, Metric
|
from vllm.v1.metrics.reader import Counter, Metric
|
||||||
|
|
||||||
from ..openai.test_vision import TEST_IMAGE_ASSETS
|
|
||||||
|
|
||||||
|
|
||||||
def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]:
|
def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]:
|
||||||
return [
|
return [
|
||||||
|
|||||||
@@ -7,11 +7,10 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.assets.audio import AudioAsset
|
from vllm.assets.audio import AudioAsset
|
||||||
from vllm.multimodal.utils import encode_audio_base64, encode_audio_url, fetch_audio
|
from vllm.multimodal.utils import encode_audio_base64, encode_audio_url, fetch_audio
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
|
MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
|
||||||
TEST_AUDIO_URLS = [
|
TEST_AUDIO_URLS = [
|
||||||
AudioAsset("winning_call").url,
|
AudioAsset("winning_call").url,
|
||||||
@@ -8,8 +8,8 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
||||||
from ...conftest import VideoTestAssets
|
from tests.conftest import VideoTestAssets
|
||||||
from ...utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
|
||||||
MODEL_NAME = "Qwen/Qwen2.5-Omni-3B"
|
MODEL_NAME = "Qwen/Qwen2.5-Omni-3B"
|
||||||
|
|
||||||
@@ -8,8 +8,8 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
from ...conftest import AudioTestAssets
|
from tests.conftest import AudioTestAssets
|
||||||
from ...utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
|
||||||
# NOTE - the tests in this module are currently analogous to test_chat, but are
|
# NOTE - the tests in this module are currently analogous to test_chat, but are
|
||||||
# separated to avoid OOM killing due to module-scoped servers, since we
|
# separated to avoid OOM killing due to module-scoped servers, since we
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
from ...utils import VLLM_PATH, RemoteOpenAIServer
|
from tests.utils import VLLM_PATH, RemoteOpenAIServer
|
||||||
|
|
||||||
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
|
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
|
||||||
assert chatml_jinja_path.exists()
|
assert chatml_jinja_path.exists()
|
||||||
@@ -8,7 +8,7 @@ from typing import Any, NamedTuple
|
|||||||
import openai # use the official client for correctness check
|
import openai # use the official client for correctness check
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
|
||||||
# # any model with a chat template should work here
|
# # any model with a chat template should work here
|
||||||
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
|
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
|
||||||
@@ -7,11 +7,10 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.multimodal.utils import encode_video_url, fetch_video
|
from vllm.multimodal.utils import encode_video_url, fetch_video
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
|
MODEL_NAME = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
|
||||||
MAXIMUM_VIDEOS = 3
|
MAXIMUM_VIDEOS = 3
|
||||||
|
|
||||||
@@ -8,12 +8,11 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from transformers import AutoProcessor
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
|
from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
|
||||||
from vllm.multimodal.media import MediaWithBytes
|
from vllm.multimodal.media import MediaWithBytes
|
||||||
from vllm.multimodal.utils import encode_image_url, fetch_image
|
from vllm.multimodal.utils import encode_image_url, fetch_image
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ...utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
|
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
|
||||||
MAXIMUM_IMAGES = 2
|
MAXIMUM_IMAGES = 2
|
||||||
|
|
||||||
@@ -8,10 +8,9 @@ import pytest
|
|||||||
import requests
|
import requests
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.utils.serial_utils import tensor2base64
|
from vllm.utils.serial_utils import tensor2base64
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
|
"model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
|
||||||
0
tests/entrypoints/openai/completion/__init__.py
Normal file
0
tests/entrypoints/openai/completion/__init__.py
Normal file
@@ -14,7 +14,7 @@ import torch
|
|||||||
from openai import BadRequestError
|
from openai import BadRequestError
|
||||||
from transformers import AutoConfig
|
from transformers import AutoConfig
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "facebook/opt-125m"
|
MODEL_NAME = "facebook/opt-125m"
|
||||||
@@ -11,11 +11,10 @@ import pytest
|
|||||||
import regex as re
|
import regex as re
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.config import ModelConfig
|
from vllm.config import ModelConfig
|
||||||
from vllm.renderers.embed_utils import safe_load_prompt_embeds
|
from vllm.renderers.embed_utils import safe_load_prompt_embeds
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_empty_prompt():
|
async def test_empty_prompt():
|
||||||
@@ -9,6 +9,7 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
import torch.cuda
|
import torch.cuda
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
from vllm.model_executor.model_loader.tensorizer import (
|
from vllm.model_executor.model_loader.tensorizer import (
|
||||||
TensorizerConfig,
|
TensorizerConfig,
|
||||||
@@ -17,8 +18,6 @@ from vllm.model_executor.model_loader.tensorizer import (
|
|||||||
)
|
)
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "unsloth/llama-3.2-1b-Instruct"
|
MODEL_NAME = "unsloth/llama-3.2-1b-Instruct"
|
||||||
LORA_PATH = "davzoku/finqa_adapter_1b"
|
LORA_PATH = "davzoku/finqa_adapter_1b"
|
||||||
|
|
||||||
@@ -6,11 +6,10 @@ import tempfile
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
|
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
|
||||||
from vllm.tokenizers import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
|
||||||
|
|
||||||
MODEL_NAME = "Qwen/Qwen3-0.6B"
|
MODEL_NAME = "Qwen/Qwen3-0.6B"
|
||||||
MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")
|
MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")
|
||||||
|
|
||||||
Reference in New Issue
Block a user