[Core] Use individual MM items in P0/P1 cache and model runner (#22570)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-08-13 22:18:07 +08:00
parent 20d65aa755
commit 19b927e52d
24 changed files with 549 additions and 486 deletions
--- a/tests/v1/engine/test_engine_core.py
+++ b/tests/v1/engine/test_engine_core.py
@@ -35,7 +35,7 @@ def make_request() -> EngineCoreRequest:
    return EngineCoreRequest(
        request_id=str(uuid.uuid4()),
        prompt_token_ids=PROMPT_TOKENS,
-        mm_inputs=None,
+        mm_kwargs=None,
        mm_hashes=None,
        mm_placeholders=None,
        sampling_params=SamplingParams(),