[V1] LoRA - Enable Serving Usecase (#12883)

Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-02-14 11:51:12 +05:30
committed by GitHub
parent f0b2da72a8
commit cbc40128eb
7 changed files with 210 additions and 7 deletions

View File

@@ -361,6 +361,10 @@ class AsyncLLM(EngineClient):
async def reset_prefix_cache(self) -> None:
await self.engine_core.reset_prefix_cache_async()
async def add_lora(self, lora_request: LoRARequest) -> None:
"""Load a new LoRA adapter into the engine for future requests."""
await self.engine_core.add_lora_async(lora_request)
@property
def is_running(self) -> bool:
return True
@@ -376,7 +380,3 @@ class AsyncLLM(EngineClient):
@property
def dead_error(self) -> BaseException:
return Exception() # TODO: implement
async def add_lora(self, lora_request: LoRARequest) -> None:
"""Load a new LoRA adapter into the engine for future requests."""
raise NotImplementedError("LoRA not yet supported in V1")