diff --git a/tests/renderers/test_sparse_tensor_validation.py b/tests/renderers/test_sparse_tensor_validation.py index 6b570f3c9..5c51cd30a 100644 --- a/tests/renderers/test_sparse_tensor_validation.py +++ b/tests/renderers/test_sparse_tensor_validation.py @@ -7,6 +7,7 @@ out-of-bounds memory writes during to_dense() operations. import io +import numpy as np import pybase64 as base64 import pytest import torch @@ -190,6 +191,51 @@ class TestImageEmbedsValidation: with pytest.raises((RuntimeError, ValueError)): io_handler.load_bytes(buffer.read()) + def test_valid_numpy_tensor_accepted(self): + """numpy .npy format should load and return correct tensor.""" + io_handler = ImageEmbeddingMediaIO() + + arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32) + buf = io.BytesIO() + np.save(buf, arr) + encoded = base64.b64encode(buf.getvalue()).decode("utf-8") + + result = io_handler.load_base64("", encoded) + assert isinstance(result, torch.Tensor) + assert result.shape == torch.Size([2, 3]) + assert result.dtype == torch.float32 + assert torch.allclose(result, torch.from_numpy(arr)) + + def test_numpy_int32_tensor_accepted(self): + """numpy int32 arrays should round-trip correctly.""" + + io_handler = ImageEmbeddingMediaIO() + + arr = np.arange(280, dtype=np.int32) + buf = io.BytesIO() + np.save(buf, arr) + encoded = base64.b64encode(buf.getvalue()).decode("utf-8") + + result = io_handler.load_base64("", encoded) + assert result.dtype == torch.int32 + assert result.shape == torch.Size([280]) + assert (result == torch.from_numpy(arr)).all() + + def test_load_file_numpy_tensor_accepted(self, tmp_path): + """numpy .npy files should load correctly via load_file.""" + + io_handler = ImageEmbeddingMediaIO() + + arr = np.array([[1.5, 2.5], [3.5, 4.5]], dtype=np.float32) + npy_path = tmp_path / "image_embeds.npy" + np.save(npy_path, arr) + + result = io_handler.load_file(npy_path) + assert isinstance(result, torch.Tensor) + assert result.shape == torch.Size([2, 2]) + assert result.dtype == torch.float32 + assert torch.allclose(result, torch.from_numpy(arr)) + class TestAudioEmbedsValidation: """Test sparse tensor validation in audio embeddings (Chat API).""" diff --git a/vllm/multimodal/media/image.py b/vllm/multimodal/media/image.py index 0390be250..ea4bf7b01 100644 --- a/vllm/multimodal/media/image.py +++ b/vllm/multimodal/media/image.py @@ -4,6 +4,7 @@ from io import BytesIO from pathlib import Path +import numpy as np import pybase64 import torch from PIL import Image @@ -13,6 +14,8 @@ from vllm.utils.serial_utils import tensor2base64 from ..image import convert_image_mode, rgba_to_rgb from .base import MediaIO, MediaWithBytes +MAGIC_NUMPY_PREFIX = b"\x93NUMPY" # https://numpy.org/devdocs/reference/generated/numpy.lib.format.html#format-version-1-0 + class ImageMediaIO(MediaIO[Image.Image]): """Configuration values can be user-provided either by --media-io-kwargs or @@ -104,7 +107,7 @@ class ImageEmbeddingMediaIO(MediaIO[torch.Tensor]): def __init__(self) -> None: super().__init__() - def load_bytes(self, data: bytes) -> torch.Tensor: + def _load_pickled_torch(self, data: bytes) -> torch.Tensor: buffer = BytesIO(data) # Enable sparse tensor integrity checks to prevent out-of-bounds # writes from maliciously crafted tensors @@ -112,12 +115,23 @@ class ImageEmbeddingMediaIO(MediaIO[torch.Tensor]): tensor = torch.load(buffer, weights_only=True) return tensor.to_dense() + def _load_numpy(self, data: bytes) -> torch.Tensor: + with BytesIO(data) as buffer: + return torch.from_numpy(np.load(buffer)) + + def load_bytes(self, data: bytes) -> torch.Tensor: + if data[:6] == MAGIC_NUMPY_PREFIX: + return self._load_numpy(data) + + return self._load_pickled_torch(data) + def load_base64(self, media_type: str, data: str) -> torch.Tensor: return self.load_bytes(pybase64.b64decode(data, validate=True)) def load_file(self, filepath: Path) -> torch.Tensor: - # Enable sparse tensor integrity checks to prevent out-of-bounds - # writes from maliciously crafted tensors + if filepath.suffix == ".npy": + return torch.from_numpy(np.load(filepath)) + with torch.sparse.check_sparse_tensor_invariants(): tensor = torch.load(filepath, weights_only=True) return tensor.to_dense()