[Core] Consolidate prompt arguments to LLM engines (#4328)
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -70,8 +70,15 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
|
||||
for prompt in prompts:
|
||||
hashes[-1].append([])
|
||||
prompt_token_ids = tokenizer.encode(prompt)
|
||||
seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
|
||||
tokenizer.tokenizer.eos_token_id, lora_request)
|
||||
seq = Sequence(seq_id,
|
||||
inputs={
|
||||
"prompt": prompt,
|
||||
"prompt_token_ids": prompt_token_ids,
|
||||
"multi_modal_data": None,
|
||||
},
|
||||
block_size=block_size,
|
||||
eos_token_id=tokenizer.tokenizer.eos_token_id,
|
||||
lora_request=lora_request)
|
||||
|
||||
num_blocks = len(prompt_token_ids) // block_size
|
||||
for idx in range(num_blocks):
|
||||
|
||||
Reference in New Issue
Block a user