[Feat] allow inplace loading lora (#31326)
Signed-off-by: Jackmin801 <ongjackm@gmail.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -104,6 +104,82 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files
|
||||
assert dynamic_lora_model.id == "qwen3-lora-3"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_lora_adapter_with_same_name_replaces_inplace(
|
||||
client: openai.AsyncOpenAI, qwen3_meowing_lora_files, qwen3_woofing_lora_files
|
||||
):
|
||||
"""Test that loading a LoRA adapter with the same name replaces it inplace."""
|
||||
adapter_name = "replaceable-adapter"
|
||||
messages = [
|
||||
{"content": "Follow the instructions to make animal noises", "role": "system"},
|
||||
{"content": "Make your favorite animal noise.", "role": "user"},
|
||||
]
|
||||
|
||||
# Load LoRA that makes model meow
|
||||
response = await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
|
||||
)
|
||||
assert "success" in response.lower()
|
||||
|
||||
completion = await client.chat.completions.create(
|
||||
model=adapter_name,
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
assert "Meow Meow Meow" in completion.choices[0].message.content
|
||||
|
||||
# Load LoRA that makes model woof
|
||||
response = await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": adapter_name,
|
||||
"lora_path": qwen3_woofing_lora_files,
|
||||
"load_inplace": True,
|
||||
},
|
||||
)
|
||||
assert "success" in response.lower()
|
||||
|
||||
completion = await client.chat.completions.create(
|
||||
model=adapter_name,
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
assert "Woof Woof Woof" in completion.choices[0].message.content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_lora_adapter_with_load_inplace_false_errors(
|
||||
client: openai.AsyncOpenAI, qwen3_meowing_lora_files
|
||||
):
|
||||
"""Test that load_inplace=False returns an error when adapter already exists."""
|
||||
adapter_name = "test-load-inplace-false"
|
||||
|
||||
# Load LoRA adapter first time (should succeed)
|
||||
response = await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
|
||||
)
|
||||
assert "success" in response.lower()
|
||||
|
||||
# Try to load the same adapter again with load_inplace=False (should fail)
|
||||
with pytest.raises(openai.BadRequestError) as exc_info:
|
||||
await client.post(
|
||||
"load_lora_adapter",
|
||||
cast_to=str,
|
||||
body={
|
||||
"lora_name": adapter_name,
|
||||
"lora_path": qwen3_meowing_lora_files,
|
||||
},
|
||||
)
|
||||
|
||||
# Verify the error message
|
||||
assert "already been loaded" in str(exc_info.value)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dynamic_lora_not_found(client: openai.AsyncOpenAI):
|
||||
with pytest.raises(openai.NotFoundError):
|
||||
|
||||
Reference in New Issue
Block a user