[1/N] Initial prototype for multi-modal processor (#10044)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-11-13 20:39:03 +08:00
committed by GitHub
parent bb7991aa29
commit 0b8bb86bf1
48 changed files with 1132 additions and 436 deletions

View File

@@ -1,5 +1,5 @@
"""Compare the with and without prefix caching."""
from vllm.inputs import DecoderOnlyInputs
from vllm.inputs import token_inputs
from vllm.sampling_params import SamplingParams
from vllm.v1.core.kv_cache_manager import KVCacheManager, Request
from vllm.v1.core.kv_cache_utils import hash_block_tokens
@@ -8,7 +8,7 @@ from vllm.v1.core.kv_cache_utils import hash_block_tokens
def make_request(request_id, prompt_token_ids):
return Request(
request_id=request_id,
inputs=DecoderOnlyInputs(prompt_token_ids=prompt_token_ids),
inputs=token_inputs(prompt_token_ids=prompt_token_ids),
sampling_params=SamplingParams(max_tokens=17),
eos_token_id=100,
arrival_time=0,