Improve multimodal hasher performance for re-used Image prompts (#22825)

Signed-off-by: Staszek Pasko <staszek@gmail.com>
This commit is contained in:
Staszek Paśko
2025-08-15 14:32:56 +02:00
committed by GitHub
parent 49252cf59e
commit 22341b996e
2 changed files with 26 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import uuid
from pathlib import Path
import numpy as np
@@ -72,3 +73,22 @@ def test_hash_non_contiguous_array():
hasher = MultiModalHasher
# Both should be hashable and produce the same hashes
assert hasher.hash_kwargs(data=arr) == hasher.hash_kwargs(data=arr_c)
def test_hash_image_exif_id():
# Test that EXIF ImageId tag can be used to store UUID
# and the hasher will use that instead of the image data.
image1 = image2 = Image.new("1", size=(10, 20))
id = uuid.uuid4()
image1.getexif()[Image.ExifTags.Base.ImageID] = id
image2 = Image.open(ASSETS_DIR / "image1.png")
image2.getexif()[Image.ExifTags.Base.ImageID] = "Not a UUID"
image2a = Image.open(ASSETS_DIR / "image1.png")
hasher = MultiModalHasher
# first image has UUID in ImageID, so it should hash to that UUID
assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs(
image=id.bytes)
# second image has non-UUID in ImageID, so it should hash to the image data
assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs(
image=image2a)