[LoRA] Cleanup LoRA unused code (#29611)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Jee Jee Li
2025-11-29 14:52:58 +08:00
committed by GitHub
parent 4a80ad0a25
commit 39e63dec7c
46 changed files with 126 additions and 173 deletions

View File

@@ -13,9 +13,8 @@ import pytest_asyncio
from ...utils import RemoteOpenAIServer
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
MODEL_NAME = "Qwen/Qwen3-0.6B"
BADREQUEST_CASES = [
(
@@ -33,11 +32,11 @@ BADREQUEST_CASES = [
@pytest.fixture(scope="module", params=[True])
def server_with_lora_modules_json(request, zephyr_lora_files):
def server_with_lora_modules_json(request, qwen3_lora_files):
# Define the json format LoRA module configurations
lora_module_1 = {
"name": "zephyr-lora",
"path": zephyr_lora_files,
"name": "qwen3-lora",
"path": qwen3_lora_files,
"base_model_name": MODEL_NAME,
}
@@ -74,7 +73,7 @@ async def client(server_with_lora_modules_json):
@pytest.mark.asyncio
async def test_static_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_files):
async def test_static_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files):
models = await client.models.list()
models = models.data
served_model = models[0]
@@ -82,17 +81,17 @@ async def test_static_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_files
assert served_model.id == MODEL_NAME
assert served_model.root == MODEL_NAME
assert served_model.parent is None
assert all(lora_model.root == zephyr_lora_files for lora_model in lora_models)
assert all(lora_model.root == qwen3_lora_files for lora_model in lora_models)
assert all(lora_model.parent == MODEL_NAME for lora_model in lora_models)
assert lora_models[0].id == "zephyr-lora"
assert lora_models[0].id == "qwen3-lora"
@pytest.mark.asyncio
async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_files):
async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files):
response = await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": "zephyr-lora-3", "lora_path": zephyr_lora_files},
body={"lora_name": "qwen3-lora-3", "lora_path": qwen3_lora_files},
)
# Ensure adapter loads before querying /models
assert "success" in response
@@ -100,9 +99,9 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, zephyr_lora_file
models = await client.models.list()
models = models.data
dynamic_lora_model = models[-1]
assert dynamic_lora_model.root == zephyr_lora_files
assert dynamic_lora_model.root == qwen3_lora_files
assert dynamic_lora_model.parent == MODEL_NAME
assert dynamic_lora_model.id == "zephyr-lora-3"
assert dynamic_lora_model.id == "qwen3-lora-3"
@pytest.mark.asyncio
@@ -134,7 +133,7 @@ async def test_dynamic_lora_invalid_files(client: openai.AsyncOpenAI, tmp_path):
async def test_dynamic_lora_badrequests(
client: openai.AsyncOpenAI,
tmp_path,
zephyr_lora_files,
qwen3_lora_files,
test_name: str,
config_change: dict,
expected_error: str,
@@ -143,7 +142,7 @@ async def test_dynamic_lora_badrequests(
test_dir = tmp_path / test_name
# Copy adapter files
shutil.copytree(zephyr_lora_files, test_dir)
shutil.copytree(qwen3_lora_files, test_dir)
# Load and modify configuration
config_path = test_dir / "adapter_config.json"
@@ -167,7 +166,7 @@ async def test_dynamic_lora_badrequests(
@pytest.mark.asyncio
async def test_multiple_lora_adapters(
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files
client: openai.AsyncOpenAI, tmp_path, qwen3_lora_files
):
"""Validate that many loras can be dynamically registered and inferenced
with concurrently"""
@@ -178,7 +177,7 @@ async def test_multiple_lora_adapters(
await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": adapter_name, "lora_path": str(zephyr_lora_files)},
body={"lora_name": adapter_name, "lora_path": str(qwen3_lora_files)},
)
for _ in range(3):
await client.completions.create(
@@ -199,7 +198,7 @@ async def test_multiple_lora_adapters(
@pytest.mark.asyncio
async def test_loading_invalid_adapters_does_not_break_others(
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files
client: openai.AsyncOpenAI, tmp_path, qwen3_lora_files
):
invalid_files = tmp_path / "invalid_files"
invalid_files.mkdir()
@@ -215,7 +214,7 @@ async def test_loading_invalid_adapters_does_not_break_others(
while not stop_good_requests_event.is_set():
try:
batch = await client.completions.create(
model="zephyr-lora",
model="qwen3-lora",
prompt=["Hello there", "Foo bar bazz buzz"],
max_tokens=5,
)
@@ -254,7 +253,7 @@ async def test_loading_invalid_adapters_does_not_break_others(
await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": "valid", "lora_path": zephyr_lora_files},
body={"lora_name": "valid", "lora_path": qwen3_lora_files},
)
await client.completions.create(
model="valid",
@@ -267,7 +266,7 @@ async def test_loading_invalid_adapters_does_not_break_others(
async def test_beam_search_with_lora_adapters(
client: openai.AsyncOpenAI,
tmp_path,
zephyr_lora_files,
qwen3_lora_files,
):
"""Validate that async beam search can be used with lora."""
@@ -275,7 +274,7 @@ async def test_beam_search_with_lora_adapters(
await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": adapter_name, "lora_path": str(zephyr_lora_files)},
body={"lora_name": adapter_name, "lora_path": str(qwen3_lora_files)},
)
for _ in range(3):
await client.completions.create(