2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-06-03 11:20:17 -07:00
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-02-02 14:58:18 -05:00
|
|
|
|
2024-01-24 00:26:37 +01:00
|
|
|
import tempfile
|
|
|
|
|
from collections import OrderedDict
|
2025-08-22 17:56:51 +08:00
|
|
|
from unittest.mock import MagicMock
|
2024-01-24 00:26:37 +01:00
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
import torch
|
|
|
|
|
import torch.nn as nn
|
|
|
|
|
from huggingface_hub import snapshot_download
|
|
|
|
|
|
2024-10-18 14:30:55 -07:00
|
|
|
from vllm.distributed import (
|
|
|
|
|
cleanup_dist_env_and_memory,
|
2024-06-12 17:27:08 -07:00
|
|
|
init_distributed_environment,
|
|
|
|
|
initialize_model_parallel,
|
|
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
from vllm.model_executor.layers.linear import (
|
|
|
|
|
ColumnParallelLinear,
|
|
|
|
|
MergedColumnParallelLinear,
|
|
|
|
|
RowParallelLinear,
|
|
|
|
|
)
|
2024-03-25 23:59:47 +09:00
|
|
|
from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
2024-01-24 00:26:37 +01:00
|
|
|
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
|
2025-02-22 16:21:30 +08:00
|
|
|
from vllm.model_executor.models.interfaces import SupportsLoRA
|
2025-01-12 13:01:52 +00:00
|
|
|
from vllm.platforms import current_platform
|
2024-01-24 00:26:37 +01:00
|
|
|
|
2024-06-15 12:45:31 +08:00
|
|
|
|
2024-06-06 19:07:57 -07:00
|
|
|
@pytest.fixture()
|
|
|
|
|
def should_do_global_cleanup_after_test(request) -> bool:
|
|
|
|
|
"""Allow subdirectories to skip global cleanup by overriding this fixture.
|
|
|
|
|
This can provide a ~10x speedup for non-GPU unit tests since they don't need
|
|
|
|
|
to initialize torch.
|
|
|
|
|
"""
|
|
|
|
|
|
2024-09-18 07:00:56 -04:00
|
|
|
return not request.node.get_closest_marker("skip_global_cleanup")
|
2024-06-06 19:07:57 -07:00
|
|
|
|
|
|
|
|
|
2024-01-24 00:26:37 +01:00
|
|
|
@pytest.fixture(autouse=True)
|
2024-06-06 19:07:57 -07:00
|
|
|
def cleanup_fixture(should_do_global_cleanup_after_test: bool):
|
2024-01-24 00:26:37 +01:00
|
|
|
yield
|
2024-06-06 19:07:57 -07:00
|
|
|
if should_do_global_cleanup_after_test:
|
2024-10-18 14:30:55 -07:00
|
|
|
cleanup_dist_env_and_memory(shutdown_ray=True)
|
2024-01-24 00:26:37 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def dist_init():
|
2026-02-28 06:46:42 +02:00
|
|
|
from tests.utils import ensure_current_vllm_config
|
|
|
|
|
|
2024-06-12 17:27:08 -07:00
|
|
|
temp_file = tempfile.mkstemp()[1]
|
2025-01-12 13:01:52 +00:00
|
|
|
|
|
|
|
|
backend = "nccl"
|
2025-05-07 21:28:47 +01:00
|
|
|
if current_platform.is_cpu() or current_platform.is_tpu():
|
2025-01-12 13:01:52 +00:00
|
|
|
backend = "gloo"
|
|
|
|
|
|
2026-02-28 06:46:42 +02:00
|
|
|
with ensure_current_vllm_config():
|
|
|
|
|
init_distributed_environment(
|
|
|
|
|
world_size=1,
|
|
|
|
|
rank=0,
|
|
|
|
|
distributed_init_method=f"file://{temp_file}",
|
|
|
|
|
local_rank=0,
|
|
|
|
|
backend=backend,
|
|
|
|
|
)
|
|
|
|
|
initialize_model_parallel(1, 1)
|
|
|
|
|
yield
|
2024-10-18 14:30:55 -07:00
|
|
|
cleanup_dist_env_and_memory(shutdown_ray=True)
|
2024-01-24 00:26:37 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def dist_init_torch_only():
|
|
|
|
|
if torch.distributed.is_initialized():
|
|
|
|
|
return
|
2025-01-12 13:01:52 +00:00
|
|
|
backend = "nccl"
|
|
|
|
|
if current_platform.is_cpu():
|
|
|
|
|
backend = "gloo"
|
|
|
|
|
|
2024-01-24 00:26:37 +01:00
|
|
|
temp_file = tempfile.mkstemp()[1]
|
2025-01-12 13:01:52 +00:00
|
|
|
torch.distributed.init_process_group(
|
|
|
|
|
world_size=1, rank=0, init_method=f"file://{temp_file}", backend=backend
|
|
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
|
|
|
|
|
|
2025-02-22 16:21:30 +08:00
|
|
|
class DummyLoRAModel(nn.Sequential, SupportsLoRA):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
2024-01-24 00:26:37 +01:00
|
|
|
@pytest.fixture
|
2026-01-08 18:20:49 -05:00
|
|
|
def dummy_model(default_vllm_config) -> nn.Module:
|
2025-02-22 16:21:30 +08:00
|
|
|
model = DummyLoRAModel(
|
2024-01-24 00:26:37 +01:00
|
|
|
OrderedDict(
|
|
|
|
|
[
|
|
|
|
|
("dense1", ColumnParallelLinear(764, 100)),
|
|
|
|
|
("dense2", RowParallelLinear(100, 50)),
|
|
|
|
|
(
|
|
|
|
|
"layer1",
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
OrderedDict(
|
|
|
|
|
[
|
|
|
|
|
("dense1", ColumnParallelLinear(100, 10)),
|
|
|
|
|
("dense2", RowParallelLinear(10, 50)),
|
|
|
|
|
]
|
2025-10-05 15:06:22 +01:00
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
),
|
2025-10-05 15:06:22 +01:00
|
|
|
),
|
2024-01-24 00:26:37 +01:00
|
|
|
("act2", nn.ReLU()),
|
|
|
|
|
("output", ColumnParallelLinear(50, 10)),
|
|
|
|
|
("outact", nn.Sigmoid()),
|
|
|
|
|
# Special handling for lm_head & sampler
|
2026-02-25 07:30:55 -08:00
|
|
|
("lm_head", ParallelLMHead(32064, 10)),
|
|
|
|
|
("logits_processor", LogitsProcessor(32064)),
|
2024-01-24 00:26:37 +01:00
|
|
|
]
|
2025-10-05 15:06:22 +01:00
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
)
|
|
|
|
|
model.config = MagicMock()
|
2025-02-22 16:21:30 +08:00
|
|
|
model.embedding_modules = {"lm_head": "lm_head"}
|
2026-02-25 07:30:55 -08:00
|
|
|
model.unpadded_vocab_size = 32064
|
2024-01-24 00:26:37 +01:00
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
2026-01-08 18:20:49 -05:00
|
|
|
def dummy_model_gate_up(default_vllm_config) -> nn.Module:
|
2025-02-22 16:21:30 +08:00
|
|
|
model = DummyLoRAModel(
|
2024-01-24 00:26:37 +01:00
|
|
|
OrderedDict(
|
|
|
|
|
[
|
|
|
|
|
("dense1", ColumnParallelLinear(764, 100)),
|
|
|
|
|
("dense2", RowParallelLinear(100, 50)),
|
|
|
|
|
(
|
|
|
|
|
"layer1",
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
OrderedDict(
|
|
|
|
|
[
|
|
|
|
|
("dense1", ColumnParallelLinear(100, 10)),
|
|
|
|
|
("dense2", RowParallelLinear(10, 50)),
|
|
|
|
|
]
|
2025-10-05 15:06:22 +01:00
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
),
|
2025-10-05 15:06:22 +01:00
|
|
|
),
|
2024-01-24 00:26:37 +01:00
|
|
|
("act2", nn.ReLU()),
|
|
|
|
|
("gate_up_proj", MergedColumnParallelLinear(50, [5, 5])),
|
|
|
|
|
("outact", nn.Sigmoid()),
|
|
|
|
|
# Special handling for lm_head & sampler
|
2026-02-25 07:30:55 -08:00
|
|
|
("lm_head", ParallelLMHead(32064, 10)),
|
|
|
|
|
("logits_processor", LogitsProcessor(32064)),
|
2024-01-24 00:26:37 +01:00
|
|
|
]
|
2025-10-05 15:06:22 +01:00
|
|
|
)
|
2024-01-24 00:26:37 +01:00
|
|
|
)
|
|
|
|
|
model.config = MagicMock()
|
2025-02-22 16:21:30 +08:00
|
|
|
model.packed_modules_mapping = {
|
|
|
|
|
"gate_up_proj": [
|
|
|
|
|
"gate_proj",
|
|
|
|
|
"up_proj",
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
model.embedding_modules = {"lm_head": "lm_head"}
|
2026-02-25 07:30:55 -08:00
|
|
|
model.unpadded_vocab_size = 32064
|
2025-08-22 17:56:51 +08:00
|
|
|
|
2024-01-24 00:26:37 +01:00
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
2024-02-13 15:55:45 -08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def mixtral_lora_files():
|
2024-07-01 02:11:15 +09:00
|
|
|
# Note: this module has incorrect adapter_config.json to test
|
|
|
|
|
# https://github.com/vllm-project/vllm/pull/5909/files.
|
|
|
|
|
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
|
2024-02-13 15:55:45 -08:00
|
|
|
|
|
|
|
|
|
2024-03-26 09:09:31 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def chatglm3_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/chatglm3-text2sql-spider")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def baichuan_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/baichuan7b-text2sql-spider")
|
|
|
|
|
|
|
|
|
|
|
2024-04-19 15:59:54 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def baichuan_zero_lora_files():
|
|
|
|
|
# all the lora_B weights are initialized to zero.
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init")
|
|
|
|
|
|
|
|
|
|
|
2024-10-11 20:31:21 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def baichuan_regex_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/baichuan-7b-lora-zero-regex")
|
|
|
|
|
|
|
|
|
|
|
2025-03-02 09:17:34 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def ilama_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/ilama-text2sql-spider")
|
|
|
|
|
|
|
|
|
|
|
2024-09-29 14:59:45 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def minicpmv_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/minicpmv25-lora-pokemon")
|
|
|
|
|
|
|
|
|
|
|
2024-12-24 17:56:10 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen2vl_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/qwen2-vl-lora-pokemon")
|
|
|
|
|
|
|
|
|
|
|
2025-05-11 03:53:58 -04:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen25vl_base_huggingface_id():
|
|
|
|
|
# used as a base model for testing with qwen25vl lora adapter
|
|
|
|
|
return "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
2025-02-20 10:37:55 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen25vl_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/qwen25-vl-lora-pokemon")
|
|
|
|
|
|
|
|
|
|
|
2025-12-26 20:48:20 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen2vl_language_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-language")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen2vl_vision_tower_connector_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-tower-connector")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen2vl_vision_tower_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-tower")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen25vl_vision_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="EpochEcho/qwen2.5-3b-vl-lora-vision-connector")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen3vl_vision_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="EpochEcho/qwen3-4b-vl-lora-vision-connector")
|
|
|
|
|
|
|
|
|
|
|
2026-01-19 18:15:20 -08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen3_meowing_lora_files():
|
|
|
|
|
"""Download Qwen3 Meow LoRA files once per test session."""
|
|
|
|
|
return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Meow-LoRA")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen3_woofing_lora_files():
|
|
|
|
|
"""Download Qwen3 Woof LoRA files once per test session."""
|
|
|
|
|
return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Woof-LoRA")
|
|
|
|
|
|
|
|
|
|
|
2024-04-12 12:02:44 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def tinyllama_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
|
|
|
|
|
|
|
|
|
|
|
2025-10-21 11:01:37 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def deepseekv2_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="wuchen01/DeepSeek-V2-Lite-Chat-All-LoRA")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def gptoss20b_lora_files():
|
2025-10-31 22:17:21 +08:00
|
|
|
return snapshot_download(repo_id="jeeejeee/gpt-oss-20b-lora-adapter-text2sql")
|
2025-10-21 11:01:37 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen3moe_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/qwen3-moe-text2sql-spider")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def olmoe_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="jeeejeee/olmoe-instruct-text2sql-spider")
|
|
|
|
|
|
|
|
|
|
|
2025-11-21 09:46:43 +08:00
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def qwen3_lora_files():
|
|
|
|
|
return snapshot_download(repo_id="charent/self_cognition_Alice")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
2025-11-29 14:52:58 +08:00
|
|
|
def llama32_lora_huggingface_id():
|
|
|
|
|
# huggingface repo id is used to test lora runtime downloading.
|
|
|
|
|
return "jeeejeee/llama32-3b-text2sql-spider"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
|
def llama32_lora_files(llama32_lora_huggingface_id):
|
|
|
|
|
return snapshot_download(repo_id=llama32_lora_huggingface_id)
|
2025-11-21 09:46:43 +08:00
|
|
|
|
|
|
|
|
|
2025-04-09 09:13:56 +08:00
|
|
|
@pytest.fixture
|
|
|
|
|
def reset_default_device():
|
|
|
|
|
"""
|
2025-05-11 03:53:58 -04:00
|
|
|
Some tests, such as `test_punica_ops.py`, explicitly set the
|
|
|
|
|
default device, which can affect subsequent tests. Adding this fixture
|
2025-04-09 09:13:56 +08:00
|
|
|
helps avoid this problem.
|
|
|
|
|
"""
|
|
|
|
|
original_device = torch.get_default_device()
|
|
|
|
|
yield
|
|
|
|
|
torch.set_default_device(original_device)
|