[Core] Consolidate prompt arguments to LLM engines (#4328)

Co-authored-by: Roger Wang <ywang@roblox.com>
2024-05-29 04:29:31 +08:00
parent 290f4ada2b
commit 5ae5ed1e60
43 changed files with 1407 additions and 442 deletions
--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -70,8 +70,15 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
            for prompt in prompts:
                hashes[-1].append([])
                prompt_token_ids = tokenizer.encode(prompt)
-                seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
-                               tokenizer.tokenizer.eos_token_id, lora_request)
+                seq = Sequence(seq_id,
+                               inputs={
+                                   "prompt": prompt,
+                                   "prompt_token_ids": prompt_token_ids,
+                                   "multi_modal_data": None,
+                               },
+                               block_size=block_size,
+                               eos_token_id=tokenizer.tokenizer.eos_token_id,
+                               lora_request=lora_request)

                num_blocks = len(prompt_token_ids) // block_size
                for idx in range(num_blocks):