[V1] LoRA - Enable Serving Usecase (#12883)

Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-02-14 11:51:12 +05:30
committed by GitHub
parent f0b2da72a8
commit cbc40128eb
7 changed files with 210 additions and 7 deletions

View File

@@ -15,6 +15,7 @@ from vllm.distributed import (ensure_model_parallel_initialized,
init_distributed_environment,
set_custom_all_reduce)
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform
from vllm.utils import GiB_bytes
@@ -234,6 +235,9 @@ class Worker(WorkerBase):
else:
self.profiler.stop()
def add_lora(self, lora_request: LoRARequest) -> bool:
return self.model_runner.add_lora(lora_request)
def check_health(self) -> None:
# worker will always be healthy as long as it's running.
return

View File

@@ -127,3 +127,8 @@ class LoRAModelRunnerMixin:
# __exit__ code
self.lora_manager.remove_all_adapters()
def add_lora(self, lora_request: LoRARequest) -> bool:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
return self.lora_manager.add_adapter(lora_request)