[Bugfix] Make siglip/clip compatible with transformers v5 (#37200)

Signed-off-by: raushan <raushan@huggingface.co>
This commit is contained in:
Raushan Turganbay
2026-03-16 17:48:18 +01:00
committed by GitHub
parent 6682c231fa
commit 55e6d3d5c0
2 changed files with 12 additions and 4 deletions

View File

@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
from transformers import CLIPModel
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
@@ -50,13 +51,16 @@ def _run_test(
if "pixel_values" in inputs:
pooled_output = hf_model.model.get_image_features(
pixel_values=inputs.pixel_values,
).squeeze(0)
)
else:
pooled_output = hf_model.model.get_text_features(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
).squeeze(0)
)
if not isinstance(pooled_output, torch.Tensor):
pooled_output = pooled_output.pooler_output
pooled_output = pooled_output.squeeze(0)
all_outputs.append(pooled_output.tolist())
hf_outputs = all_outputs

View File

@@ -4,6 +4,7 @@
from typing import Any
import pytest
import torch
from transformers import SiglipModel
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
@@ -68,12 +69,15 @@ def _run_test(
if "pixel_values" in inputs:
pooled_output = hf_model.model.get_image_features(
pixel_values=inputs.pixel_values,
).squeeze(0)
)
else:
pooled_output = hf_model.model.get_text_features(
input_ids=inputs.input_ids,
).squeeze(0)
)
if not isinstance(pooled_output, torch.Tensor):
pooled_output = pooled_output.pooler_output
pooled_output = pooled_output.squeeze(0)
all_outputs.append(pooled_output.tolist())
hf_outputs = all_outputs