[Multimodal][Core] Optimize multimodal preprocessing cache by hashing image bytes instead of pixel values (#29621)
Signed-off-by: Rahul Steiger <rasteiger@ethz.ch> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -59,6 +59,7 @@ from vllm.distributed import (
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob
|
||||
from vllm.multimodal.base import MediaWithBytes
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.sampling_params import BeamSearchParams
|
||||
@@ -1389,7 +1390,11 @@ class LocalAssetServer:
|
||||
return f"{self.base_url}/{name}"
|
||||
|
||||
def get_image_asset(self, name: str) -> Image.Image:
|
||||
return fetch_image(self.url_for(name))
|
||||
image = fetch_image(self.url_for(name))
|
||||
# Unwrap MediaWithBytes if present
|
||||
if isinstance(image, MediaWithBytes):
|
||||
image = image.media
|
||||
return image
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
||||
Reference in New Issue
Block a user