[Renderer] Move InputPreprocessor into Renderer (1/2) (#34510)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Cyrus Leung
2026-02-15 02:14:21 +08:00
committed by GitHub
parent b3c14229b0
commit 73391a1baa
39 changed files with 456 additions and 458 deletions

View File

@@ -194,14 +194,23 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
*,
mm_uuids: MultiModalUUIDDict | None = None,
) -> MultiModalInputs:
if prompt and mm_items:
raise ValueError(
"Siglip accepts text-only or image-only inputs, not both! "
"Image-only inputs means passing an image with an empty text "
"prompt."
)
if mm_items:
if isinstance(prompt, str):
if len(prompt) > 0:
raise ValueError(
"SigLIP accepts text-only or image-only inputs, not both! "
"You must pass an image with an empty text prompt."
)
else:
special_tokens = self.info.get_tokenizer().all_special_ids
if all(tok in special_tokens for tok in prompt):
prompt = []
else:
raise ValueError(
"SigLIP accepts text-only or image-only inputs, not both! "
"You must pass an image with an empty token prompt."
)
# For multi-modal data, the prompt after processing should
# only contain the image token
tokenization_kwargs = {