Re-submit: Fix: Proper RGBA -> RGB conversion for PIL images. (#18569)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua
2025-05-22 18:59:18 -07:00
committed by GitHub
parent 46791e1b4b
commit 04eb88dc80
15 changed files with 89 additions and 20 deletions

View File

@@ -12,7 +12,7 @@ from transformers import AutoTokenizer
from vllm.assets.image import ImageAsset
from vllm.lora.request import LoRARequest
from vllm.multimodal.image import rescale_image_size
from vllm.multimodal.image import convert_image_mode, rescale_image_size
from vllm.platforms import current_platform
from vllm.sequence import SampleLogprobs
@@ -267,7 +267,7 @@ def test_vision_speech_models(hf_runner, vllm_runner, model, dtype: str,
# use the example speech question so that the model outputs are reasonable
audio = librosa.load(speech_question, sr=None)
image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
inputs_vision_speech = [
(