Rename servers to engines (#152)

2023-06-17 17:25:21 +08:00
parent bab8f3dd0d
commit e5464ee484
15 changed files with 165 additions and 174 deletions
--- a/examples/llm_engine_example.py
+++ b/examples/llm_engine_example.py
@@ -0,0 +1,45 @@
+import argparse
+
+from cacheflow import EngineArgs, LLMEngine, SamplingParams
+
+
+def main(args: argparse.Namespace):
+    # Parse the CLI argument and initialize the engine.
+    engine_args = EngineArgs.from_cli_args(args)
+    engine = LLMEngine.from_engine_args(engine_args)
+
+    # Test the following prompts.
+    test_prompts = [
+        ("A robot may not injure a human being", SamplingParams()),
+        ("To be or not to be,",
+         SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
+        ("What is the meaning of life?",
+         SamplingParams(n=2, best_of=5, temperature=0.8, top_p=0.95, frequency_penalty=0.1)),
+        ("It is only with the heart that one can see rightly",
+         SamplingParams(n=3, best_of=3, use_beam_search=True, temperature=0.0)),
+    ]
+
+    # Run the engine by calling `engine.step()` manually.
+    request_id = 0
+    while True:
+        # To test iteration-level scheduling, we add one request at each step.
+        if test_prompts:
+            prompt, sampling_params = test_prompts.pop(0)
+            engine.add_request(str(request_id), prompt, sampling_params)
+            request_id += 1
+
+        request_outputs = engine.step()
+        for request_output in request_outputs:
+            if request_output.finished():
+                print(request_output)
+
+        if not (engine.has_unfinished_requests() or test_prompts):
+            break
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Demo on using the LLMEngine class directly')
+    parser = EngineArgs.add_cli_args(parser)
+    args = parser.parse_args()
+    main(args)