[LoRA] Add support for pinning lora adapters in the LRU cache (#5603)

2024-06-21 15:42:46 -07:00
parent 7187507301
commit f5dda63eb5
13 changed files with 171 additions and 5 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1009,6 +1009,9 @@ class LLMEngine:
    def list_loras(self) -> Set[int]:
        return self.model_executor.list_loras()

+    def pin_lora(self, lora_id: int) -> bool:
+        return self.model_executor.pin_lora(lora_id)
+
    def check_health(self) -> None:
        self.model_executor.check_health()