diff --git a/docs/features/lora.md b/docs/features/lora.md
index dda6b4768..1a30ad7b0 100644
--- a/docs/features/lora.md
+++ b/docs/features/lora.md
@@ -210,6 +210,24 @@ Alternatively, follow these example steps to implement your own plugin:
 
     For more details, refer to the [vLLM's Plugins System](../design/plugin_system.md).
 
+### In-Place LoRA Reloading
+
+When dynamically loading LoRA adapters, you may need to replace an existing adapter with updated weights while keeping the same name. The `load_inplace` parameter enables this functionality. This commonly occurs in asynchronous reinforcement learning setups, where adapters are continuously updated and swapped in without interrupting ongoing inference.
+
+When `load_inplace=True`, vLLM will replace the existing adapter with the new one.
+
+Example request to load or replace a LoRA adapter with the same name:
+
+```bash
+curl -X POST http://localhost:8000/v1/load_lora_adapter \
+-H "Content-Type: application/json" \
+-d '{
+    "lora_name": "my-adapter",
+    "lora_path": "/path/to/adapter/v2",
+    "load_inplace": true
+}'
+```
+
 ## New format for `--lora-modules`
 
 In the previous version, users would provide LoRA modules via the following format, either as a key-value pair or in JSON format. For example:
diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py
index 9ab50c44a..c2e9a1de3 100644
--- a/tests/entrypoints/conftest.py
+++ b/tests/entrypoints/conftest.py
@@ -195,6 +195,22 @@ def qwen3_lora_files():
     return snapshot_download(repo_id="charent/self_cognition_Alice")
 
 
+@pytest.fixture(scope="session")
+def qwen3_meowing_lora_files():
+    """Download Qwen3 LoRA files once per test session."""
+    from huggingface_hub import snapshot_download
+
+    return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Meow-LoRA")
+
+
+@pytest.fixture(scope="session")
+def qwen3_woofing_lora_files():
+    """Download Qwen3 LoRA files once per test session."""
+    from huggingface_hub import snapshot_download
+
+    return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Woof-LoRA")
+
+
 @pytest.fixture(scope="session")
 def opt125_lora_files() -> str:
     """Download opt-125m LoRA files once per test session."""
diff --git a/tests/entrypoints/openai/test_lora_adapters.py b/tests/entrypoints/openai/test_lora_adapters.py
index 22461f470..aa664f6d7 100644
--- a/tests/entrypoints/openai/test_lora_adapters.py
+++ b/tests/entrypoints/openai/test_lora_adapters.py
@@ -104,6 +104,82 @@ async def test_dynamic_lora_lineage(client: openai.AsyncOpenAI, qwen3_lora_files
     assert dynamic_lora_model.id == "qwen3-lora-3"
 
 
+@pytest.mark.asyncio
+async def test_load_lora_adapter_with_same_name_replaces_inplace(
+    client: openai.AsyncOpenAI, qwen3_meowing_lora_files, qwen3_woofing_lora_files
+):
+    """Test that loading a LoRA adapter with the same name replaces it inplace."""
+    adapter_name = "replaceable-adapter"
+    messages = [
+        {"content": "Follow the instructions to make animal noises", "role": "system"},
+        {"content": "Make your favorite animal noise.", "role": "user"},
+    ]
+
+    # Load LoRA that makes model meow
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
+    )
+    assert "success" in response.lower()
+
+    completion = await client.chat.completions.create(
+        model=adapter_name,
+        messages=messages,
+        max_tokens=10,
+    )
+    assert "Meow Meow Meow" in completion.choices[0].message.content
+
+    # Load LoRA that makes model woof
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={
+            "lora_name": adapter_name,
+            "lora_path": qwen3_woofing_lora_files,
+            "load_inplace": True,
+        },
+    )
+    assert "success" in response.lower()
+
+    completion = await client.chat.completions.create(
+        model=adapter_name,
+        messages=messages,
+        max_tokens=10,
+    )
+    assert "Woof Woof Woof" in completion.choices[0].message.content
+
+
+@pytest.mark.asyncio
+async def test_load_lora_adapter_with_load_inplace_false_errors(
+    client: openai.AsyncOpenAI, qwen3_meowing_lora_files
+):
+    """Test that load_inplace=False returns an error when adapter already exists."""
+    adapter_name = "test-load-inplace-false"
+
+    # Load LoRA adapter first time (should succeed)
+    response = await client.post(
+        "load_lora_adapter",
+        cast_to=str,
+        body={"lora_name": adapter_name, "lora_path": qwen3_meowing_lora_files},
+    )
+    assert "success" in response.lower()
+
+    # Try to load the same adapter again with load_inplace=False (should fail)
+    with pytest.raises(openai.BadRequestError) as exc_info:
+        await client.post(
+            "load_lora_adapter",
+            cast_to=str,
+            body={
+                "lora_name": adapter_name,
+                "lora_path": qwen3_meowing_lora_files,
+            },
+        )
+
+    # Verify the error message
+    assert "already been loaded" in str(exc_info.value)
+
+
 @pytest.mark.asyncio
 async def test_dynamic_lora_not_found(client: openai.AsyncOpenAI):
     with pytest.raises(openai.NotFoundError):
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 928d64827..deb1ab92d 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -233,6 +233,18 @@ def qwen3vl_vision_lora_files():
     return snapshot_download(repo_id="EpochEcho/qwen3-4b-vl-lora-vision-connector")
 
 
+@pytest.fixture(scope="session")
+def qwen3_meowing_lora_files():
+    """Download Qwen3 Meow LoRA files once per test session."""
+    return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Meow-LoRA")
+
+
+@pytest.fixture(scope="session")
+def qwen3_woofing_lora_files():
+    """Download Qwen3 Woof LoRA files once per test session."""
+    return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Woof-LoRA")
+
+
 @pytest.fixture(scope="session")
 def tinyllama_lora_files():
     return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
diff --git a/tests/lora/test_llm_with_multi_loras.py b/tests/lora/test_llm_with_multi_loras.py
index 269a1ade7..56bac026b 100644
--- a/tests/lora/test_llm_with_multi_loras.py
+++ b/tests/lora/test_llm_with_multi_loras.py
@@ -30,9 +30,11 @@ LORA_TEST_EXPECTED = [
 ]
 
 
-def format_chatml_messages(prompt: str):
+def format_chatml_messages(
+    prompt: str, system_prompt: str = "You are a helpful assistant."
+) -> list[dict[str, str]]:
     return [
-        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
 
@@ -185,3 +187,110 @@ def test_multiple_lora_requests():
     single_lora_request = lora_request[0]
     outputs = llm.generate(PROMPTS, lora_request=single_lora_request)
     assert len(PROMPTS) == len(outputs)
+
+
+def test_load_inplace_offline_reload(
+    qwen3_meowing_lora_files: str, qwen3_woofing_lora_files: str
+) -> None:
+    """
+    Test that load_inplace=True allows reloading LoRA adapters with the same ID
+    in offline mode (using LLM class directly).
+    """
+    llm = LLM(
+        model=MODEL_PATH,
+        enable_lora=True,
+        max_loras=2,
+        max_lora_rank=LORA_RANK,
+        max_model_len=512,
+        gpu_memory_utilization=0.5,
+        enforce_eager=True,
+    )
+    adapter_id = 1
+    messages = format_chatml_messages(
+        "Make your favorite animal noise.",
+        system_prompt="Follow the instructions to make animal noises",
+    )
+    sampling_params = SamplingParams(temperature=0, max_tokens=10)
+
+    # Load meowing LoRA with load_inplace=True
+    meowing_request = LoRARequest(
+        lora_name="test-adapter",
+        lora_int_id=adapter_id,
+        lora_path=qwen3_meowing_lora_files,
+    )
+
+    outputs = llm.chat([messages], sampling_params, lora_request=meowing_request)
+    first_output = outputs[0].outputs[0].text.strip()
+    assert "Meow Meow Meow" in first_output, (
+        f"Expected meowing output, got: {first_output}"
+    )
+
+    # Reload with woofing LoRA (same ID, different weights, load_inplace=True)
+    woofing_request = LoRARequest(
+        lora_name="test-adapter-woof",
+        lora_int_id=adapter_id,  # Same ID
+        lora_path=qwen3_woofing_lora_files,  # Different weights
+        load_inplace=True,  # Force reload
+    )
+
+    outputs = llm.chat([messages], sampling_params, lora_request=woofing_request)
+    second_output = outputs[0].outputs[0].text.strip()
+    assert "Woof Woof Woof" in second_output, (
+        f"Expected woofing output, got: {second_output}"
+    )
+
+
+def test_load_inplace_false_no_reload(
+    qwen3_meowing_lora_files: str, qwen3_woofing_lora_files: str
+) -> None:
+    """
+    Test that load_inplace=False prevents reloading when an adapter
+    with the same ID already exists.
+    """
+    llm = LLM(
+        model=MODEL_PATH,
+        enable_lora=True,
+        max_loras=2,
+        max_lora_rank=LORA_RANK,
+        max_model_len=512,
+        gpu_memory_utilization=0.5,
+        enforce_eager=True,
+    )
+    adapter_id = 2
+    messages = format_chatml_messages(
+        "Make your favorite animal noise.",
+        system_prompt="Follow the instructions to make animal noises",
+    )
+    sampling_params = SamplingParams(temperature=0, max_tokens=10)
+
+    # Load meowing LoRA first with load_inplace=True
+    meowing_request_initial = LoRARequest(
+        lora_name="test-adapter-2",
+        lora_int_id=adapter_id,
+        lora_path=qwen3_meowing_lora_files,
+    )
+
+    outputs = llm.chat(
+        [messages], sampling_params, lora_request=meowing_request_initial
+    )
+    first_output = outputs[0].outputs[0].text.strip()
+    assert "Meow Meow Meow" in first_output, (
+        f"Expected meowing output, got: {first_output}"
+    )
+
+    # Try to load woofing LoRA with same ID but load_inplace=False
+    # This should NOT reload (adapter 2 already exists)
+    woofing_request_no_reload = LoRARequest(
+        lora_name="test-adapter-2-woof",
+        lora_int_id=adapter_id,  # Same ID
+        lora_path=qwen3_woofing_lora_files,
+    )
+
+    outputs = llm.chat(
+        [messages], sampling_params, lora_request=woofing_request_no_reload
+    )
+    second_output = outputs[0].outputs[0].text.strip()
+    # Should still get meowing output because it didn't reload
+    assert "Meow Meow Meow" in second_output, (
+        f"Expected meowing output (no reload), got: {second_output}"
+    )
diff --git a/vllm/entrypoints/openai/models/serving.py b/vllm/entrypoints/openai/models/serving.py
index 2d8cf8f33..a4b92e5ec 100644
--- a/vllm/entrypoints/openai/models/serving.py
+++ b/vllm/entrypoints/openai/models/serving.py
@@ -132,9 +132,16 @@ class OpenAIServingModels:
                 return error_check_ret
 
             lora_path = request.lora_path
-            unique_id = self.lora_id_counter.inc(1)
+            lora_int_id = (
+                self.lora_requests[lora_name].lora_int_id
+                if lora_name in self.lora_requests
+                else self.lora_id_counter.inc(1)
+            )
             lora_request = LoRARequest(
-                lora_name=lora_name, lora_int_id=unique_id, lora_path=lora_path
+                lora_name=lora_name,
+                lora_int_id=lora_int_id,
+                lora_path=lora_path,
+                load_inplace=request.load_inplace,
             )
             if base_model_name is not None and self.is_base_model(base_model_name):
                 lora_request.base_model_name = base_model_name
@@ -187,11 +194,13 @@ class OpenAIServingModels:
                 status_code=HTTPStatus.BAD_REQUEST,
             )
 
+        # If not loading inplace
         # Check if the lora adapter with the given name already exists
-        if request.lora_name in self.lora_requests:
+        if not request.load_inplace and request.lora_name in self.lora_requests:
             return create_error_response(
                 message=f"The lora adapter '{request.lora_name}' has already been "
-                "loaded.",
+                "loaded. If you want to load the adapter in place, set 'load_inplace'"
+                " to True.",
                 err_type="InvalidUserInput",
                 status_code=HTTPStatus.BAD_REQUEST,
             )
diff --git a/vllm/entrypoints/serve/lora/api_router.py b/vllm/entrypoints/serve/lora/api_router.py
index 51bfc755f..057bf5c2e 100644
--- a/vllm/entrypoints/serve/lora/api_router.py
+++ b/vllm/entrypoints/serve/lora/api_router.py
@@ -36,6 +36,7 @@ def attach_router(app: FastAPI):
         request_shape={
             "lora_name": "body.name",
             "lora_path": "body.src",
+            "load_inplace": "body.load_inplace || `false`",
         },
     )
     @router.post("/v1/load_lora_adapter", dependencies=[Depends(validate_json_request)])
diff --git a/vllm/entrypoints/serve/lora/protocol.py b/vllm/entrypoints/serve/lora/protocol.py
index e39f35f38..3e3a30cf3 100644
--- a/vllm/entrypoints/serve/lora/protocol.py
+++ b/vllm/entrypoints/serve/lora/protocol.py
@@ -7,6 +7,7 @@ from pydantic import BaseModel, Field
 class LoadLoRAAdapterRequest(BaseModel):
     lora_name: str
     lora_path: str
+    load_inplace: bool = False
 
 
 class UnloadLoRAAdapterRequest(BaseModel):
diff --git a/vllm/lora/request.py b/vllm/lora/request.py
index 2811fee1d..008ade5e5 100644
--- a/vllm/lora/request.py
+++ b/vllm/lora/request.py
@@ -15,6 +15,11 @@ class LoRARequest(
 
     lora_int_id must be globally unique for a given adapter.
     This is currently not enforced in vLLM.
+
+    load_inplace: If True, forces reloading the adapter even if one
+        with the same lora_int_id already exists in the cache. This replaces
+        the existing adapter in-place. If False (default), only loads if the
+        adapter is not already loaded.
     """
 
     lora_name: str
@@ -22,6 +27,7 @@ class LoRARequest(
     lora_path: str = ""
     base_model_name: str | None = msgspec.field(default=None)
     tensorizer_config_dict: dict | None = None
+    load_inplace: bool = False
 
     def __post_init__(self):
         if self.lora_int_id < 1:
diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py
index 277e462a3..598c10407 100644
--- a/vllm/lora/worker_manager.py
+++ b/vllm/lora/worker_manager.py
@@ -254,13 +254,20 @@ class LRUCacheWorkerLoRAManager(WorkerLoRAManager):
         # This is ok because it's currently only called from
         # the single-threaded core engine loop.
 
-        if lora_request.lora_int_id not in self.list_adapters():
+        if (
+            lora_request.lora_int_id not in self.list_adapters()
+            or lora_request.load_inplace
+        ):
             # Load the new adapter first to ensure it is actually valid, before
             # evicting any existing adapters.
             # This may cause the # of loaded lora adapters to very temporarily
             # exceed `--max-cpu-loras`.
             lora = self._load_adapter(lora_request)
 
+            # Remove the existing adapter if it exists
+            # Use case for LoRA inplace
+            self._adapter_manager.remove_adapter(lora.id)
+
             # Loading succeeded, now check if we will exceed cache capacity and
             # evict if the oldest adapter if so
             if len(self._adapter_manager) + 1 > self._adapter_manager.capacity: