[Bugfix] Validate lora adapters to avoid crashing server (#11727)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Joe Runde
2025-01-10 00:56:36 -07:00
committed by GitHub
parent cf5f000d21
commit ac2f3f7fee
15 changed files with 460 additions and 172 deletions

View File

@@ -1257,6 +1257,10 @@ class AsyncLLMEngine(EngineClient):
else:
self.engine.model_executor._run_workers("stop_profile")
async def add_lora(self, lora_request: LoRARequest) -> None:
"""Load a new LoRA adapter into the engine for future requests."""
self.engine.add_lora(lora_request)
# TODO(v1): Remove this class proxy when V1 goes default.
if envs.VLLM_USE_V1: