[Experimental] Add multi-LoRA support (#1804)

Co-authored-by: Chen Shen <scv119@gmail.com> Co-authored-by: Shreyas Krishnaswamy <shrekris@anyscale.com> Co-authored-by: Avnish Narayan <avnish@anyscale.com>
2024-01-24 00:26:37 +01:00
parent 9c1352eb57
commit 9b945daaf1
52 changed files with 8035 additions and 126 deletions
--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@@ -25,6 +25,13 @@ class MockEngine:
        return [RequestOutput(
            request_id=self.request_id)] if self.request_id else []

+    async def encode_request_async(
+        self,
+        *args,
+        **kwargs,
+    ):
+        return [1]
+
    def generate(self, request_id):
        self.request_id = request_id

@@ -35,6 +42,10 @@ class MockEngine:
        del kwargs  # Unused
        self.add_request_calls += 1

+    async def add_request_async(self, **kwargs):
+        del kwargs  # Unused
+        self.add_request_calls += 1
+
    def abort_request(self, request_id):
        del request_id  # Unused
        self.abort_request_calls += 1