[Core] Use individual MM items in P0/P1 cache and model runner (#22570)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-13 22:18:07 +08:00
committed by GitHub
parent 20d65aa755
commit 19b927e52d
24 changed files with 549 additions and 486 deletions

View File

@@ -203,7 +203,7 @@ def _construct_cached_request_state(req_id_suffix: int):
prompt_token_ids=prompt_token_ids,
sampling_params=_create_sampling_params(),
pooling_params=None,
mm_inputs=[],
mm_kwargs=[],
mm_positions=[],
block_ids=([], ),
generator=None,