[V1] LoRA - Enable Serving Usecase (#12883)
Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com> Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
f0b2da72a8
commit
cbc40128eb
@@ -361,6 +361,10 @@ class AsyncLLM(EngineClient):
|
||||
async def reset_prefix_cache(self) -> None:
|
||||
await self.engine_core.reset_prefix_cache_async()
|
||||
|
||||
async def add_lora(self, lora_request: LoRARequest) -> None:
|
||||
"""Load a new LoRA adapter into the engine for future requests."""
|
||||
await self.engine_core.add_lora_async(lora_request)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return True
|
||||
@@ -376,7 +380,3 @@ class AsyncLLM(EngineClient):
|
||||
@property
|
||||
def dead_error(self) -> BaseException:
|
||||
return Exception() # TODO: implement
|
||||
|
||||
async def add_lora(self, lora_request: LoRARequest) -> None:
|
||||
"""Load a new LoRA adapter into the engine for future requests."""
|
||||
raise NotImplementedError("LoRA not yet supported in V1")
|
||||
|
||||
Reference in New Issue
Block a user