[Core] Use individual MM items in P0/P1 cache and model runner (#22570)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -203,7 +203,7 @@ def _construct_cached_request_state(req_id_suffix: int):
|
||||
prompt_token_ids=prompt_token_ids,
|
||||
sampling_params=_create_sampling_params(),
|
||||
pooling_params=None,
|
||||
mm_inputs=[],
|
||||
mm_kwargs=[],
|
||||
mm_positions=[],
|
||||
block_ids=([], ),
|
||||
generator=None,
|
||||
|
||||
Reference in New Issue
Block a user