[Core][Multimodal] Track encode cache entries by mm_hash and enable embedding sharing between requests (#22711)

Signed-off-by: knlnguyen1802 <knlnguyen1802@gmail.com> Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: knlnguyen1802 <knlnguyen1802@gmail.com> Co-authored-by: Roger Wang <hey@rogerw.io>
2025-08-25 15:41:17 +08:00
parent 712d0f88d8
commit d765cf01fe
12 changed files with 365 additions and 154 deletions
--- a/tests/v1/core/utils.py
+++ b/tests/v1/core/utils.py
@@ -143,7 +143,11 @@ def create_requests(
            mm_position = mm_positions[i]
            mm_item = MultiModalKwargsItem.dummy("dummy_m")
            mm_kwargs = [mm_item] * len(mm_position)
-            mm_hashes = ["hash"] * len(mm_position)
+            # Dummy hash for each mm item should be unique
+            # since encoder cache tracks entries by hash
+            mm_hashes = [
+                "hash" + str(i) + "_" + str(j) for j in range(len(mm_position))
+            ]
        else:
            mm_position = None
            mm_kwargs = None