[Misc] IO Processor plugins for pooling models (#22820)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
2025-09-01 07:07:12 +01:00
parent 437c3ce026
commit 1cb39dbcdd
25 changed files with 1183 additions and 43 deletions
--- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/init.py
+++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/init.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+def register_prithvi_india():
+    return "prithvi_io_processor.prithvi_processor.PrithviMultimodalDataProcessorIndia"  # noqa: E501
+
+
+def register_prithvi_valencia():
+    return "prithvi_io_processor.prithvi_processor.PrithviMultimodalDataProcessorValencia"  # noqa: E501
--- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py
+++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py
@@ -0,0 +1,449 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import base64
+import datetime
+import os
+import tempfile
+import urllib.request
+from collections.abc import AsyncGenerator, Sequence
+from typing import Any, Optional, Union
+
+import albumentations
+import numpy as np
+import rasterio
+import regex as re
+import torch
+from einops import rearrange
+from terratorch.datamodules import Sen1Floods11NonGeoDataModule
+
+from vllm.config import VllmConfig
+from vllm.entrypoints.openai.protocol import (IOProcessorRequest,
+                                              IOProcessorResponse)
+from vllm.inputs.data import PromptType
+from vllm.logger import init_logger
+from vllm.outputs import PoolingRequestOutput
+from vllm.plugins.io_processors.interface import (IOProcessor,
+                                                  IOProcessorInput,
+                                                  IOProcessorOutput)
+
+from .types import DataModuleConfig, ImagePrompt, ImageRequestOutput
+
+logger = init_logger(__name__)
+
+NO_DATA = -9999
+NO_DATA_FLOAT = 0.0001
+OFFSET = 0
+PERCENTILE = 99
+
+DEFAULT_INPUT_INDICES = [0, 1, 2, 3, 4, 5]
+
+datamodule_config: DataModuleConfig = {
+    "bands": ["BLUE", "GREEN", "RED", "NIR_NARROW", "SWIR_1", "SWIR_2"],
+    "batch_size":
+    16,
+    "constant_scale":
+    0.0001,
+    "data_root":
+    "/dccstor/geofm-finetuning/datasets/sen1floods11",
+    "drop_last":
+    True,
+    "no_data_replace":
+    0.0,
+    "no_label_replace":
+    -1,
+    "num_workers":
+    8,
+    "test_transform": [
+        albumentations.Resize(always_apply=False,
+                              height=448,
+                              interpolation=1,
+                              p=1,
+                              width=448),
+        albumentations.pytorch.ToTensorV2(transpose_mask=False,
+                                          always_apply=True,
+                                          p=1.0),
+    ],
+}
+
+
+def save_geotiff(image: torch.Tensor, meta: dict,
+                 out_format: str) -> str | bytes:
+    """Save multi-band image in Geotiff file.
+
+    Args:
+        image: np.ndarray with shape (bands, height, width)
+        output_path: path where to save the image
+        meta: dict with meta info.
+    """
+    if out_format == "path":
+        # create temp file
+        file_path = os.path.join(os.getcwd(), "prediction.tiff")
+        with rasterio.open(file_path, "w", **meta) as dest:
+            for i in range(image.shape[0]):
+                dest.write(image[i, :, :], i + 1)
+
+        return file_path
+    elif out_format == "b64_json":
+        with tempfile.NamedTemporaryFile() as tmpfile:
+            with rasterio.open(tmpfile.name, "w", **meta) as dest:
+                for i in range(image.shape[0]):
+                    dest.write(image[i, :, :], i + 1)
+
+            file_data = tmpfile.read()
+            return base64.b64encode(file_data)
+
+    else:
+        raise ValueError("Unknown output format")
+
+
+def _convert_np_uint8(float_image: torch.Tensor):
+    image = float_image.numpy() * 255.0
+    image = image.astype(dtype=np.uint8)
+
+    return image
+
+
+def read_geotiff(
+    file_path: Optional[str] = None,
+    path_type: Optional[str] = None,
+    file_data: Optional[bytes] = None,
+) -> tuple[torch.Tensor, dict, tuple[float, float] | None]:
+    """Read all bands from *file_path* and return image + meta info.
+
+    Args:
+        file_path: path to image file.
+
+    Returns:
+        np.ndarray with shape (bands, height, width)
+        meta info dict
+    """
+
+    if all([x is None for x in [file_path, path_type, file_data]]):
+        raise Exception("All input fields to read_geotiff are None")
+    write_to_file: Optional[bytes] = None
+    path: Optional[str] = None
+    if file_data is not None:
+        # with tempfile.NamedTemporaryFile() as tmpfile:
+        #     tmpfile.write(file_data)
+        #     path = tmpfile.name
+
+        write_to_file = file_data
+    elif file_path is not None and path_type == "url":
+        resp = urllib.request.urlopen(file_path)
+        # with tempfile.NamedTemporaryFile() as tmpfile:
+        #     tmpfile.write(resp.read())
+        #     path = tmpfile.name
+        write_to_file = resp.read()
+    elif file_path is not None and path_type == "path":
+        path = file_path
+    elif file_path is not None and path_type == "b64_json":
+        image_data = base64.b64decode(file_path)
+        # with tempfile.NamedTemporaryFile() as tmpfile:
+        #     tmpfile.write(image_data)
+        #     path = tmpfile.name
+        write_to_file = image_data
+    else:
+        raise Exception("Wrong combination of parameters to read_geotiff")
+
+    with tempfile.NamedTemporaryFile() as tmpfile:
+        path_to_use = None
+        if write_to_file:
+            tmpfile.write(write_to_file)
+            path_to_use = tmpfile.name
+        elif path:
+            path_to_use = path
+
+        with rasterio.open(path_to_use) as src:
+            img = src.read()
+            meta = src.meta
+            try:
+                coords = src.lnglat()
+            except Exception:
+                # Cannot read coords
+                coords = None
+
+    return img, meta, coords
+
+
+def load_image(
+    data: Union[list[str]],
+    path_type: str,
+    mean: Optional[list[float]] = None,
+    std: Optional[list[float]] = None,
+    indices: Optional[Union[list[int], None]] = None,
+):
+    """Build an input example by loading images in *file_paths*.
+
+    Args:
+        file_paths: list of file paths .
+        mean: list containing mean values for each band in the
+              images in *file_paths*.
+        std: list containing std values for each band in the
+             images in *file_paths*.
+
+    Returns:
+        np.array containing created example
+        list of meta info for each image in *file_paths*
+    """
+
+    imgs = []
+    metas = []
+    temporal_coords = []
+    location_coords = []
+
+    for file in data:
+        # if isinstance(file, bytes):
+        #     img, meta, coords = read_geotiff(file_data=file)
+        # else:
+        img, meta, coords = read_geotiff(file_path=file, path_type=path_type)
+        # Rescaling (don't normalize on nodata)
+        img = np.moveaxis(img, 0, -1)  # channels last for rescaling
+        if indices is not None:
+            img = img[..., indices]
+        if mean is not None and std is not None:
+            img = np.where(img == NO_DATA, NO_DATA_FLOAT, (img - mean) / std)
+
+        imgs.append(img)
+        metas.append(meta)
+        if coords is not None:
+            location_coords.append(coords)
+
+        try:
+            match = re.search(r"(\d{7,8}T\d{6})", file)
+            if match:
+                year = int(match.group(1)[:4])
+                julian_day = match.group(1).split("T")[0][4:]
+                if len(julian_day) == 3:
+                    julian_day = int(julian_day)
+                else:
+                    julian_day = (datetime.datetime.strptime(
+                        julian_day, "%m%d").timetuple().tm_yday)
+                temporal_coords.append([year, julian_day])
+        except Exception:
+            logger.exception("Could not extract timestamp for %s", file)
+
+    imgs = np.stack(imgs, axis=0)  # num_frames, H, W, C
+    imgs = np.moveaxis(imgs, -1, 0).astype("float32")  # C, num_frames, H, W
+    imgs = np.expand_dims(imgs, axis=0)  # add batch di
+
+    return imgs, temporal_coords, location_coords, metas
+
+
+class PrithviMultimodalDataProcessor(IOProcessor):
+
+    def __init__(self, vllm_config: VllmConfig):
+
+        super().__init__(vllm_config)
+
+        self.datamodule = Sen1Floods11NonGeoDataModule(
+            data_root=datamodule_config["data_root"],
+            batch_size=datamodule_config["batch_size"],
+            num_workers=datamodule_config["num_workers"],
+            bands=datamodule_config["bands"],
+            drop_last=datamodule_config["drop_last"],
+            test_transform=datamodule_config["test_transform"],
+        )
+        self.img_size = 512
+        self.h1 = 1
+        self.w1 = 1
+        self.original_h = 512
+        self.original_w = 512
+        self.batch_size = 1
+        self.meta_data = None
+        self.requests_cache: dict[str, dict[str, Any]] = {}
+        self.indices = DEFAULT_INPUT_INDICES
+
+    def parse_request(self, request: Any) -> IOProcessorInput:
+        if type(request) is dict:
+            image_prompt = ImagePrompt(**request)
+            return image_prompt
+        if isinstance(request, IOProcessorRequest):
+            if not hasattr(request, "data"):
+                raise ValueError(
+                    "missing 'data' field in OpenAIBaseModel Request")
+
+            request_data = request.data
+
+            if type(request_data) is dict:
+                return ImagePrompt(**request_data)
+            else:
+                raise ValueError("Unable to parse the request data")
+
+        raise ValueError("Unable to parse request")
+
+    def output_to_response(
+            self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
+        return IOProcessorResponse(
+            request_id=plugin_output.request_id,
+            data=plugin_output,
+        )
+
+    def pre_process(
+        self,
+        prompt: IOProcessorInput,
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> Union[PromptType, Sequence[PromptType]]:
+
+        image_data = dict(prompt)
+
+        if request_id:
+            self.requests_cache[request_id] = {
+                "out_format": image_data["out_data_format"],
+            }
+
+        input_data, temporal_coords, location_coords, meta_data = load_image(
+            data=[image_data["data"]],
+            indices=self.indices,
+            path_type=image_data["data_format"],
+        )
+
+        self.meta_data = meta_data[0]
+
+        if input_data.mean() > 1:
+            input_data = input_data / 10000  # Convert to range 0-1
+
+        self.original_h, self.original_w = input_data.shape[-2:]
+        pad_h = (self.img_size -
+                 (self.original_h % self.img_size)) % self.img_size
+        pad_w = (self.img_size -
+                 (self.original_w % self.img_size)) % self.img_size
+        input_data = np.pad(
+            input_data,
+            ((0, 0), (0, 0), (0, 0), (0, pad_h), (0, pad_w)),
+            mode="reflect",
+        )
+
+        batch = torch.tensor(input_data)
+        windows = batch.unfold(3, self.img_size,
+                               self.img_size).unfold(4, self.img_size,
+                                                     self.img_size)
+        self.h1, self.w1 = windows.shape[3:5]
+        windows = rearrange(
+            windows,
+            "b c t h1 w1 h w -> (b h1 w1) c t h w",
+            h=self.img_size,
+            w=self.img_size,
+        )
+
+        # Split into batches if number of windows > batch_size
+        num_batches = (windows.shape[0] // self.batch_size
+                       if windows.shape[0] > self.batch_size else 1)
+        windows = torch.tensor_split(windows, num_batches, dim=0)
+
+        if temporal_coords:
+            temporal_coords = torch.tensor(temporal_coords).unsqueeze(0)
+        else:
+            temporal_coords = None
+        if location_coords:
+            location_coords = torch.tensor(location_coords[0]).unsqueeze(0)
+        else:
+            location_coords = None
+
+        prompts = []
+        for window in windows:
+            # Apply standardization
+            window = self.datamodule.test_transform(
+                image=window.squeeze().numpy().transpose(1, 2, 0))
+            window = self.datamodule.aug(window)["image"]
+            prompts.append({
+                "prompt_token_ids": [1],
+                "multi_modal_data": {
+                    "pixel_values": window.to(torch.float16)[0],
+                    "location_coords": location_coords.to(torch.float16),
+                },
+            })
+
+        return prompts
+
+    async def pre_process_async(
+        self,
+        prompt: IOProcessorInput,
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> Union[PromptType, Sequence[PromptType]]:
+        return self.pre_process(prompt, request_id, **kwargs)
+
+    def post_process(
+        self,
+        model_output: Sequence[PoolingRequestOutput],
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> IOProcessorOutput:
+
+        pred_imgs_list = []
+
+        if request_id and (request_id in self.requests_cache):
+            out_format = self.requests_cache[request_id]["out_format"]
+        else:
+            out_format = "b64_json"
+
+        for output in model_output:
+            y_hat = output.outputs.data.argmax(dim=1)
+            pred = torch.nn.functional.interpolate(
+                y_hat.unsqueeze(1).float(),
+                size=self.img_size,
+                mode="nearest",
+            )
+            pred_imgs_list.append(pred)
+
+        pred_imgs: torch.Tensor = torch.concat(pred_imgs_list, dim=0)
+
+        # Build images from patches
+        pred_imgs = rearrange(
+            pred_imgs,
+            "(b h1 w1) c h w -> b c (h1 h) (w1 w)",
+            h=self.img_size,
+            w=self.img_size,
+            b=1,
+            c=1,
+            h1=self.h1,
+            w1=self.w1,
+        )
+
+        # Cut padded area back to original size
+        pred_imgs = pred_imgs[..., :self.original_h, :self.original_w]
+
+        # Squeeze (batch size 1)
+        pred_imgs = pred_imgs[0]
+
+        if not self.meta_data:
+            raise ValueError("No metadata available for the current task")
+        self.meta_data.update(count=1, dtype="uint8", compress="lzw", nodata=0)
+        out_data = save_geotiff(_convert_np_uint8(pred_imgs), self.meta_data,
+                                out_format)
+
+        return ImageRequestOutput(type=out_format,
+                                  format="tiff",
+                                  data=out_data,
+                                  request_id=request_id)
+
+    async def post_process_async(
+        self,
+        model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> IOProcessorOutput:
+        collected_output = [item async for i, item in model_output]
+        return self.post_process(collected_output, request_id, **kwargs)
+
+
+class PrithviMultimodalDataProcessorIndia(PrithviMultimodalDataProcessor):
+
+    def __init__(self, vllm_config: VllmConfig):
+
+        super().__init__(vllm_config)
+
+        self.indices = [1, 2, 3, 8, 11, 12]
+
+
+class PrithviMultimodalDataProcessorValencia(PrithviMultimodalDataProcessor):
+
+    def __init__(self, vllm_config: VllmConfig):
+
+        super().__init__(vllm_config)
+
+        self.indices = [0, 1, 2, 3, 4, 5]
--- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py
+++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any, Literal, Optional, TypedDict, Union
+
+import albumentations
+from pydantic import BaseModel
+
+
+class DataModuleConfig(TypedDict):
+    bands: list[str]
+    batch_size: int
+    constant_scale: float
+    data_root: str
+    drop_last: bool
+    no_data_replace: float
+    no_label_replace: int
+    num_workers: int
+    test_transform: list[
+        albumentations.core.transforms_interface.BasicTransform]
+
+
+class ImagePrompt(BaseModel):
+
+    data_format: Literal["b64_json", "bytes", "url"]
+    """
+    This is the data type for the input image
+    """
+
+    image_format: str
+    """
+    This is the image format (e.g., jpeg, png, etc.)
+    """
+
+    out_data_format: Literal["b64_json", "url"]
+
+    data: Any
+    """
+    Input image data
+    """
+
+
+MultiModalPromptType = Union[ImagePrompt]
+
+
+class ImageRequestOutput(BaseModel):
+    """
+    The output data of an image request to vLLM. 
+
+    Args:
+        type (str): The data content type [path, object]
+        format (str): The image format (e.g., jpeg, png, etc.)
+        data (Any): The resulting data.
+    """
+
+    type: Literal["path", "b64_json"]
+    format: str
+    data: str
+    request_id: Optional[str] = None
--- a/tests/plugins/prithvi_io_processor_plugin/setup.py
+++ b/tests/plugins/prithvi_io_processor_plugin/setup.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from setuptools import setup
+
+setup(
+    name="prithvi_io_processor_plugin",
+    version="0.1",
+    packages=["prithvi_io_processor"],
+    entry_points={
+        "vllm.io_processor_plugins": [
+            "prithvi_to_tiff_india = prithvi_io_processor:register_prithvi_india",  # noqa: E501
+            "prithvi_to_tiff_valencia = prithvi_io_processor:register_prithvi_valencia",  # noqa: E501
+        ]
+    },
+)