[VLM] Support multimodal inputs for Florence-2 models (#13320)

This commit is contained in:
Isotr0py
2025-02-27 18:06:41 +08:00
committed by GitHub
parent 788f284b53
commit edf309ebbe
13 changed files with 1075 additions and 114 deletions

View File

@@ -29,8 +29,8 @@ def _test_processing_correctness(
model_config = ModelConfig(
model_id,
task="auto",
tokenizer=model_id,
tokenizer_mode="auto",
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=model_info.tokenizer_mode,
trust_remote_code=model_info.trust_remote_code,
seed=0,
dtype="float16",
@@ -151,6 +151,7 @@ def _test_processing_correctness(
"Salesforce/blip2-opt-2.7b",
"facebook/chameleon-7b",
"deepseek-ai/deepseek-vl2-tiny",
"microsoft/Florence-2-base",
"adept/fuyu-8b",
"THUDM/glm-4v-9b",
"h2oai/h2ovl-mississippi-800m",