[Bugfix] Fix base64 JPEG video frames returning empty metadata (#37301)

Signed-off-by: Yufeng He <40085740+universeplayer@users.noreply.github.com>
Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Yufeng He <40085740+universeplayer@users.noreply.github.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Yufeng He
2026-03-18 21:40:03 +08:00
committed by GitHub
parent 98b09ddc27
commit 918b7890a1
2 changed files with 66 additions and 2 deletions

View File

@@ -1,9 +1,11 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import io
from pathlib import Path
import numpy as np
import numpy.typing as npt
import pybase64
import pytest
from PIL import Image
@@ -235,3 +237,53 @@ def test_video_media_io_backend_env_var_fallback(monkeypatch: pytest.MonkeyPatch
frames_missing, metadata_missing = videoio_missing.load_bytes(b"test")
np.testing.assert_array_equal(frames_missing, FAKE_OUTPUT_2)
assert metadata_missing["video_backend"] == "test_video_backend_override_2"
def test_load_base64_jpeg_returns_metadata():
"""Regression test: load_base64 with video/jpeg must return metadata.
Previously, base64 JPEG frame sequences returned an empty dict for
metadata, which broke downstream consumers that rely on fields like
total_num_frames and fps. See PR #37301.
"""
num_test_frames = 3
frame_width, frame_height = 8, 8
# Build a few tiny JPEG frames and base64-encode them
b64_frames = []
for i in range(num_test_frames):
img = Image.new("RGB", (frame_width, frame_height), color=(i * 80, 0, 0))
buf = io.BytesIO()
img.save(buf, format="JPEG")
b64_frames.append(pybase64.b64encode(buf.getvalue()).decode("ascii"))
data = ",".join(b64_frames)
imageio = ImageMediaIO()
videoio = VideoMediaIO(imageio, num_frames=num_test_frames)
frames, metadata = videoio.load_base64("video/jpeg", data)
# Frames array shape: (num_frames, H, W, 3)
assert frames.shape[0] == num_test_frames
# All required metadata keys must be present
required_keys = {
"total_num_frames",
"fps",
"duration",
"video_backend",
"frames_indices",
"do_sample_frames",
}
assert required_keys.issubset(metadata.keys()), (
f"Missing metadata keys: {required_keys - metadata.keys()}"
)
assert metadata["total_num_frames"] == num_test_frames
assert metadata["video_backend"] == "jpeg_sequence"
assert metadata["frames_indices"] == list(range(num_test_frames))
assert metadata["do_sample_frames"] is False
# Default fps=1 → duration == num_frames
assert metadata["fps"] == 1.0
assert metadata["duration"] == float(num_test_frames)

View File

@@ -80,9 +80,21 @@ class VideoMediaIO(MediaIO[tuple[npt.NDArray, dict[str, Any]]]):
"image/jpeg",
)
return np.stack(
frames = np.stack(
[np.asarray(load_frame(frame_data)) for frame_data in data.split(",")]
), {}
)
total = int(frames.shape[0])
fps = float(self.kwargs.get("fps", 1))
duration = total / fps if fps > 0 else 0.0
metadata = {
"total_num_frames": total,
"fps": fps,
"duration": duration,
"video_backend": "jpeg_sequence",
"frames_indices": list(range(total)),
"do_sample_frames": False,
}
return frames, metadata
return self.load_bytes(pybase64.b64decode(data))