[Core] Use individual MM items in P0/P1 cache and model runner (#22570)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -203,7 +203,7 @@ def _construct_cached_request_state(req_id_suffix: int):
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
sampling_params=_create_sampling_params(),
|
||||
pooling_params=None,
|
||||
mm_inputs=[],
|
||||
mm_kwargs=[],
|
||||
mm_positions=[],
|
||||
block_ids=([], ),
|
||||
generator=None,
|
||||
|
||||
@@ -120,7 +120,7 @@ def _schedule_new_request(*req_ids: str) -> SchedulerOutput:
|
||||
NewRequestData(
|
||||
req_id=req_id,
|
||||
prompt_token_ids=[1, 2, 3],
|
||||
mm_inputs=[],
|
||||
mm_kwargs=[],
|
||||
mm_hashes=[],
|
||||
mm_positions=[],
|
||||
sampling_params=SamplingParams(),
|
||||
|
||||
Reference in New Issue
Block a user