[Feat] allow inplace loading lora (#31326)

Signed-off-by: Jackmin801 <ongjackm@gmail.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2026-01-19 18:15:20 -08:00
parent 05dc4bfab6
commit 12dab78f49
10 changed files with 262 additions and 7 deletions
--- a/tests/entrypoints/openai/test_lora_adapters.py
+++ b/tests/entrypoints/openai/test_lora_adapters.py
@@ -104,6 +104,82 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files
    assert dynamic_lora_model.id == "qwen3-lora-3"


+@pytest.mark.asyncio
+async def test_load_lora_adapter_with_same_name_replaces_inplace(
+    client: openai.AsyncOpenAI, qwen3_meowing_lora_files, qwen3_woofing_lora_files
+):
+    """Test that loading a LoRA adapter with the same name replaces it inplace."""
+    adapter_name = "replaceable-adapter"
+    messages = [
+        {"content": "Follow the instructions to make animal noises", "role": "system"},
+        {"content": "Make your favorite animal noise.", "role": "user"},
+    ]
+
+    # Load LoRA that makes model meow
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
+    )
+    assert "success" in response.lower()
+
+    completion = await client.chat.completions.create(
+        model=adapter_name,
+        messages=messages,
+        max_tokens=10,
+    )
+    assert "Meow Meow Meow" in completion.choices[0].message.content
+
+    # Load LoRA that makes model woof
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={
+            "lora_name": adapter_name,
+            "lora_path": qwen3_woofing_lora_files,
+            "load_inplace": True,
+        },
+    )
+    assert "success" in response.lower()
+
+    completion = await client.chat.completions.create(
+        model=adapter_name,
+        messages=messages,
+        max_tokens=10,
+    )
+    assert "Woof Woof Woof" in completion.choices[0].message.content
+
+
+@pytest.mark.asyncio
+async def test_load_lora_adapter_with_load_inplace_false_errors(
+    client: openai.AsyncOpenAI, qwen3_meowing_lora_files
+):
+    """Test that load_inplace=False returns an error when adapter already exists."""
+    adapter_name = "test-load-inplace-false"
+
+    # Load LoRA adapter first time (should succeed)
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
+    )
+    assert "success" in response.lower()
+
+    # Try to load the same adapter again with load_inplace=False (should fail)
+    with pytest.raises(openai.BadRequestError) as exc_info:
+        await client.post(
+            "load_lora_adapter",
+            cast_to=str,
+            body={
+                "lora_name": adapter_name,
+                "lora_path": qwen3_meowing_lora_files,
+            },
+        )
+
+    # Verify the error message
+    assert "already been loaded" in str(exc_info.value)
+
+
@pytest.mark.asyncio
 async def test_dynamic_lora_not_found(client: openai.AsyncOpenAI):
    with pytest.raises(openai.NotFoundError):