[Feat] allow inplace loading lora (#31326)

Signed-off-by: Jackmin801 <ongjackm@gmail.com>
Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jackmin801
2026-01-19 18:15:20 -08:00
committed by GitHub
parent 05dc4bfab6
commit 12dab78f49
10 changed files with 262 additions and 7 deletions

View File

@@ -104,6 +104,82 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files
assert dynamic_lora_model.id == "qwen3-lora-3"
@pytest.mark.asyncio
async def test_load_lora_adapter_with_same_name_replaces_inplace(
client: openai.AsyncOpenAI, qwen3_meowing_lora_files, qwen3_woofing_lora_files
):
"""Test that loading a LoRA adapter with the same name replaces it inplace."""
adapter_name = "replaceable-adapter"
messages = [
{"content": "Follow the instructions to make animal noises", "role": "system"},
{"content": "Make your favorite animal noise.", "role": "user"},
]
# Load LoRA that makes model meow
response = await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
)
assert "success" in response.lower()
completion = await client.chat.completions.create(
model=adapter_name,
messages=messages,
max_tokens=10,
)
assert "Meow Meow Meow" in completion.choices[0].message.content
# Load LoRA that makes model woof
response = await client.post(
"load_lora_adapter",
cast_to=str,
body={
"lora_name": adapter_name,
"lora_path": qwen3_woofing_lora_files,
"load_inplace": True,
},
)
assert "success" in response.lower()
completion = await client.chat.completions.create(
model=adapter_name,
messages=messages,
max_tokens=10,
)
assert "Woof Woof Woof" in completion.choices[0].message.content
@pytest.mark.asyncio
async def test_load_lora_adapter_with_load_inplace_false_errors(
client: openai.AsyncOpenAI, qwen3_meowing_lora_files
):
"""Test that load_inplace=False returns an error when adapter already exists."""
adapter_name = "test-load-inplace-false"
# Load LoRA adapter first time (should succeed)
response = await client.post(
"load_lora_adapter",
cast_to=str,
body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
)
assert "success" in response.lower()
# Try to load the same adapter again with load_inplace=False (should fail)
with pytest.raises(openai.BadRequestError) as exc_info:
await client.post(
"load_lora_adapter",
cast_to=str,
body={
"lora_name": adapter_name,
"lora_path": qwen3_meowing_lora_files,
},
)
# Verify the error message
assert "already been loaded" in str(exc_info.value)
@pytest.mark.asyncio
async def test_dynamic_lora_not_found(client: openai.AsyncOpenAI):
with pytest.raises(openai.NotFoundError):