[Misc] Automatically resolve HF processor init kwargs (#22005)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-01 13:44:10 +08:00
committed by GitHub
parent ad57f23f6a
commit 82de9b9d46
40 changed files with 334 additions and 727 deletions

View File

@@ -702,13 +702,38 @@ VLM_TEST_SETTINGS = {
"smolvlm": VLMTestInfo(
models=["HuggingFaceTB/SmolVLM2-2.2B-Instruct"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt:f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:", # noqa: E501
prompt_formatter=lambda img_prompt: f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>",
max_model_len=8192,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
hf_output_post_proc=model_utils.smolvlm_trunc_hf_output,
),
"tarsier": VLMTestInfo(
models=["omni-research/Tarsier-7b"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"USER: {img_prompt} ASSISTANT:",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
patch_hf_runner=model_utils.tarsier_patch_hf_runner,
),
"tarsier2": VLMTestInfo(
models=["omni-research/Tarsier2-Recap-7b"],
test_type=(
VLMTestType.IMAGE,
VLMTestType.MULTI_IMAGE,
VLMTestType.VIDEO,
),
prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[pytest.mark.skip("Model initialization hangs")],
),
### Tensor parallel / multi-gpu broadcast tests
"chameleon-broadcast": VLMTestInfo(
models=["facebook/chameleon-7b"],