diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 4dab4b7d9..d9b7a2821 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -377,7 +377,7 @@ VLM_TEST_SETTINGS = { use_tokenizer_eos=True, vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output, num_logprobs=10, - image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], marks=[large_gpu_mark(min_gb=32)], ), "gemma3": VLMTestInfo( @@ -437,7 +437,7 @@ VLM_TEST_SETTINGS = { max_num_seqs=2, get_stop_token_ids=lambda tok: [151329, 151336, 151338], num_logprobs=10, - image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], auto_cls=AutoModelForImageTextToText, marks=[large_gpu_mark(min_gb=32)], ), @@ -468,7 +468,7 @@ VLM_TEST_SETTINGS = { max_num_seqs=2, get_stop_token_ids=lambda tok: [151329, 151336, 151338], num_logprobs=10, - image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], auto_cls=AutoModelForImageTextToText, marks=[large_gpu_mark(min_gb=32)], ),