[Refactor] Relocate completion and chat completion tests (#37125)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2026-03-16 23:31:23 -04:00
committed by GitHub
parent f04d5226f8
commit 384dc7f77b
26 changed files with 41 additions and 48 deletions

View File

@@ -333,15 +333,15 @@ apply_rocm_test_overrides() {
# --- Entrypoint ignores --- # --- Entrypoint ignores ---
if [[ $cmds == *" entrypoints/openai "* ]]; then if [[ $cmds == *" entrypoints/openai "* ]]; then
cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \ cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
--ignore=entrypoints/openai/test_audio.py \ --ignore=entrypoints/openai/chat_completion/test_audio.py \
--ignore=entrypoints/openai/test_shutdown.py \ --ignore=entrypoints/openai/completion/test_shutdown.py \
--ignore=entrypoints/openai/test_completion.py \ --ignore=entrypoints/openai/test_completion.py \
--ignore=entrypoints/openai/test_models.py \ --ignore=entrypoints/openai/test_models.py \
--ignore=entrypoints/openai/test_lora_adapters.py \ --ignore=entrypoints/openai/test_lora_adapters.py \
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \ --ignore=entrypoints/openai/test_return_tokens_as_ids.py \
--ignore=entrypoints/openai/test_root_path.py \ --ignore=entrypoints/openai/chat_completion/test_root_path.py \
--ignore=entrypoints/openai/test_tokenization.py \ --ignore=entrypoints/openai/test_tokenization.py \
--ignore=entrypoints/openai/test_prompt_validation.py "} --ignore=entrypoints/openai/completion/test_prompt_validation.py "}
fi fi
if [[ $cmds == *" entrypoints/llm "* ]]; then if [[ $cmds == *" entrypoints/llm "* ]]; then

View File

@@ -162,7 +162,7 @@ steps:
- tests/entrypoints/test_chat_utils - tests/entrypoints/test_chat_utils
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2) - label: Entrypoints Integration Test (API Server 2)
@@ -674,12 +674,12 @@ steps:
- vllm/config/model.py - vllm/config/model.py
- vllm/model_executor - vllm/model_executor
- tests/model_executor - tests/model_executor
- tests/entrypoints/openai/test_tensorizer_entrypoint.py - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
commands: commands:
- apt-get update && apt-get install -y curl libsodium23 - apt-get update && apt-get install -y curl libsodium23
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s model_executor - pytest -v -s model_executor
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
- label: Benchmarks # 11min - label: Benchmarks # 11min
timeout_in_minutes: 20 timeout_in_minutes: 20
@@ -1143,7 +1143,7 @@ steps:
- pytest -v -s plugins_tests/test_scheduler_plugins.py - pytest -v -s plugins_tests/test_scheduler_plugins.py
- pip install -e ./plugins/vllm_add_dummy_model - pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py - pytest -v -s distributed/test_distributed_oot.py
- pytest -v -s entrypoints/openai/test_oot_registration.py - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py
- pytest -v -s models/test_oot_registration.py - pytest -v -s models/test_oot_registration.py
- pytest -v -s plugins/lora_resolvers - pytest -v -s plugins/lora_resolvers
@@ -1502,7 +1502,7 @@ steps:
- tests/entrypoints/test_chat_utils - tests/entrypoints/test_chat_utils
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2) - label: Entrypoints Integration Test (API Server 2)
@@ -2133,12 +2133,12 @@ steps:
- vllm/config/model.py - vllm/config/model.py
- vllm/model_executor - vllm/model_executor
- tests/model_executor - tests/model_executor
- tests/entrypoints/openai/test_tensorizer_entrypoint.py - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
commands: commands:
- apt-get update && apt-get install -y curl libsodium23 - apt-get update && apt-get install -y curl libsodium23
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s model_executor - pytest -v -s model_executor
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
- label: Benchmarks # 11min - label: Benchmarks # 11min
timeout_in_minutes: 20 timeout_in_minutes: 20
@@ -2735,7 +2735,7 @@ steps:
- pytest -v -s plugins_tests/test_scheduler_plugins.py - pytest -v -s plugins_tests/test_scheduler_plugins.py
- pip install -e ./plugins/vllm_add_dummy_model - pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py - pytest -v -s distributed/test_distributed_oot.py
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
- pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
@@ -3257,7 +3257,7 @@ steps:
- tests/entrypoints/test_chat_utils - tests/entrypoints/test_chat_utils
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2) - label: Entrypoints Integration Test (API Server 2)
@@ -3872,12 +3872,12 @@ steps:
- vllm/config/model.py - vllm/config/model.py
- vllm/model_executor - vllm/model_executor
- tests/model_executor - tests/model_executor
- tests/entrypoints/openai/test_tensorizer_entrypoint.py - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
commands: commands:
- apt-get update && apt-get install -y curl libsodium23 - apt-get update && apt-get install -y curl libsodium23
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s model_executor - pytest -v -s model_executor
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
- label: Benchmarks # 11min - label: Benchmarks # 11min
timeout_in_minutes: 20 timeout_in_minutes: 20
@@ -4508,7 +4508,7 @@ steps:
- pytest -v -s plugins_tests/test_scheduler_plugins.py - pytest -v -s plugins_tests/test_scheduler_plugins.py
- pip install -e ./plugins/vllm_add_dummy_model - pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py - pytest -v -s distributed/test_distributed_oot.py
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
- pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins

View File

@@ -34,7 +34,7 @@ steps:
- tests/entrypoints/test_chat_utils - tests/entrypoints/test_chat_utils
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/test_chat_utils.py
mirror: mirror:
amd: amd:

View File

@@ -9,9 +9,9 @@ steps:
- vllm/config/model.py - vllm/config/model.py
- vllm/model_executor - vllm/model_executor
- tests/model_executor - tests/model_executor
- tests/entrypoints/openai/test_tensorizer_entrypoint.py - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
commands: commands:
- apt-get update && apt-get install -y curl libsodium23 - apt-get update && apt-get install -y curl libsodium23
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s model_executor - pytest -v -s model_executor
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py

View File

@@ -36,6 +36,6 @@ steps:
- pytest -v -s plugins_tests/test_scheduler_plugins.py - pytest -v -s plugins_tests/test_scheduler_plugins.py
- pip install -e ./plugins/vllm_add_dummy_model - pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py - pytest -v -s distributed/test_distributed_oot.py
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py # it needs a clean process
- pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins

2
.github/mergify.yml vendored
View File

@@ -381,7 +381,7 @@ pull_request_rules:
- or: - or:
- files~=^vllm/model_executor/model_loader/tensorizer.py - files~=^vllm/model_executor/model_loader/tensorizer.py
- files~=^vllm/model_executor/model_loader/tensorizer_loader.py - files~=^vllm/model_executor/model_loader/tensorizer_loader.py
- files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py - files~=^tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
- files~=^tests/model_executor/model_loader/tensorizer_loader/ - files~=^tests/model_executor/model_loader/tensorizer_loader/
actions: actions:
assign: assign:

View File

@@ -50,7 +50,7 @@ av==16.1.0
blobfile==3.0.0 blobfile==3.0.0
# Multi-Modal Models Test # Multi-Modal Models Test
decord==0.6.0 decord==0.6.0
# video processing, required by entrypoints/openai/test_video.py # video processing, required by entrypoints/openai/chat_completion/test_video.py
rapidfuzz==3.12.1 rapidfuzz==3.12.1
# OpenAI compatibility and testing # OpenAI compatibility and testing

View File

@@ -1,7 +1,9 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from ..entrypoints.openai.test_oot_registration import run_and_test_dummy_opt_api_server from tests.entrypoints.openai.chat_completion.test_oot_registration import (
run_and_test_dummy_opt_api_server,
)
def test_distributed_oot(dummy_opt_path: str): def test_distributed_oot(dummy_opt_path: str):

View File

@@ -4,12 +4,11 @@ import weakref
import pytest import pytest
from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS
from vllm import LLM from vllm import LLM
from vllm.distributed import cleanup_dist_env_and_memory from vllm.distributed import cleanup_dist_env_and_memory
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from ..openai.test_vision import TEST_IMAGE_ASSETS
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def text_llm(): def text_llm():

View File

@@ -6,13 +6,12 @@ import logging
import pytest import pytest
import regex as re import regex as re
from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS
from vllm import LLM from vllm import LLM
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.v1.metrics import loggers as stat_loggers from vllm.v1.metrics import loggers as stat_loggers
from vllm.v1.metrics.reader import Counter, Metric from vllm.v1.metrics.reader import Counter, Metric
from ..openai.test_vision import TEST_IMAGE_ASSETS
def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]: def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]:
return [ return [

View File

@@ -7,11 +7,10 @@ import openai
import pytest import pytest
import pytest_asyncio import pytest_asyncio
from tests.utils import RemoteOpenAIServer
from vllm.assets.audio import AudioAsset from vllm.assets.audio import AudioAsset
from vllm.multimodal.utils import encode_audio_base64, encode_audio_url, fetch_audio from vllm.multimodal.utils import encode_audio_base64, encode_audio_url, fetch_audio
from ...utils import RemoteOpenAIServer
MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b" MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
TEST_AUDIO_URLS = [ TEST_AUDIO_URLS = [
AudioAsset("winning_call").url, AudioAsset("winning_call").url,

View File

@@ -8,8 +8,8 @@ import openai
import pytest import pytest
import pytest_asyncio import pytest_asyncio
from ...conftest import VideoTestAssets from tests.conftest import VideoTestAssets
from ...utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
MODEL_NAME = "Qwen/Qwen2.5-Omni-3B" MODEL_NAME = "Qwen/Qwen2.5-Omni-3B"

View File

@@ -8,8 +8,8 @@ import pytest
import pytest_asyncio import pytest_asyncio
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from ...conftest import AudioTestAssets from tests.conftest import AudioTestAssets
from ...utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
# NOTE - the tests in this module are currently analogous to test_chat, but are # NOTE - the tests in this module are currently analogous to test_chat, but are
# separated to avoid OOM killing due to module-scoped servers, since we # separated to avoid OOM killing due to module-scoped servers, since we

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from ...utils import VLLM_PATH, RemoteOpenAIServer from tests.utils import VLLM_PATH, RemoteOpenAIServer
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists() assert chatml_jinja_path.exists()

View File

@@ -8,7 +8,7 @@ from typing import Any, NamedTuple
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
from ...utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
# # any model with a chat template should work here # # any model with a chat template should work here
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"

View File

@@ -7,11 +7,10 @@ import openai
import pytest import pytest
import pytest_asyncio import pytest_asyncio
from tests.utils import RemoteOpenAIServer
from vllm.multimodal.utils import encode_video_url, fetch_video from vllm.multimodal.utils import encode_video_url, fetch_video
from vllm.platforms import current_platform from vllm.platforms import current_platform
from ...utils import RemoteOpenAIServer
MODEL_NAME = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf" MODEL_NAME = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
MAXIMUM_VIDEOS = 3 MAXIMUM_VIDEOS = 3

View File

@@ -8,12 +8,11 @@ import pytest
import pytest_asyncio import pytest_asyncio
from transformers import AutoProcessor from transformers import AutoProcessor
from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
from vllm.multimodal.media import MediaWithBytes from vllm.multimodal.media import MediaWithBytes
from vllm.multimodal.utils import encode_image_url, fetch_image from vllm.multimodal.utils import encode_image_url, fetch_image
from vllm.platforms import current_platform from vllm.platforms import current_platform
from ...utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct" MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
MAXIMUM_IMAGES = 2 MAXIMUM_IMAGES = 2

View File

@@ -8,10 +8,9 @@ import pytest
import requests import requests
import torch import torch
from tests.utils import RemoteOpenAIServer
from vllm.utils.serial_utils import tensor2base64 from vllm.utils.serial_utils import tensor2base64
from ...utils import RemoteOpenAIServer
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] "model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]

View File

@@ -14,7 +14,7 @@ import torch
from openai import BadRequestError from openai import BadRequestError
from transformers import AutoConfig from transformers import AutoConfig
from ...utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "facebook/opt-125m" MODEL_NAME = "facebook/opt-125m"

View File

@@ -11,11 +11,10 @@ import pytest
import regex as re import regex as re
import torch import torch
from tests.utils import RemoteOpenAIServer
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.renderers.embed_utils import safe_load_prompt_embeds from vllm.renderers.embed_utils import safe_load_prompt_embeds
from ...utils import RemoteOpenAIServer
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_empty_prompt(): async def test_empty_prompt():

View File

@@ -9,6 +9,7 @@ import pytest
import pytest_asyncio import pytest_asyncio
import torch.cuda import torch.cuda
from tests.utils import RemoteOpenAIServer
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.model_executor.model_loader.tensorizer import ( from vllm.model_executor.model_loader.tensorizer import (
TensorizerConfig, TensorizerConfig,
@@ -17,8 +18,6 @@ from vllm.model_executor.model_loader.tensorizer import (
) )
from vllm.platforms import current_platform from vllm.platforms import current_platform
from ...utils import RemoteOpenAIServer
MODEL_NAME = "unsloth/llama-3.2-1b-Instruct" MODEL_NAME = "unsloth/llama-3.2-1b-Instruct"
LORA_PATH = "davzoku/finqa_adapter_1b" LORA_PATH = "davzoku/finqa_adapter_1b"

View File

@@ -6,11 +6,10 @@ import tempfile
import pytest import pytest
from tests.utils import RemoteOpenAIServer
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer
MODEL_NAME = "Qwen/Qwen3-0.6B" MODEL_NAME = "Qwen/Qwen3-0.6B"
MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b") MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")