[VLM] Support multimodal inputs for Florence-2 models (#13320)
This commit is contained in:
@@ -29,8 +29,8 @@ def _test_processing_correctness(
|
||||
model_config = ModelConfig(
|
||||
model_id,
|
||||
task="auto",
|
||||
tokenizer=model_id,
|
||||
tokenizer_mode="auto",
|
||||
tokenizer=model_info.tokenizer or model_id,
|
||||
tokenizer_mode=model_info.tokenizer_mode,
|
||||
trust_remote_code=model_info.trust_remote_code,
|
||||
seed=0,
|
||||
dtype="float16",
|
||||
@@ -151,6 +151,7 @@ def _test_processing_correctness(
|
||||
"Salesforce/blip2-opt-2.7b",
|
||||
"facebook/chameleon-7b",
|
||||
"deepseek-ai/deepseek-vl2-tiny",
|
||||
"microsoft/Florence-2-base",
|
||||
"adept/fuyu-8b",
|
||||
"THUDM/glm-4v-9b",
|
||||
"h2oai/h2ovl-mississippi-800m",
|
||||
|
||||
Reference in New Issue
Block a user