Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-06 06:12:40 +01:00
parent 91ac7f764d
commit 6c04638214
65 changed files with 301 additions and 291 deletions
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -130,14 +130,14 @@ VLM_TEST_SETTINGS = {
        dtype="bfloat16",
        marks=[
            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
-        ],  # noqa: E501
+        ],
    ),
    "qwen2_5_vl": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -149,8 +149,8 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2.5-Omni-3B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
+        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
        max_model_len=4096,
        max_num_seqs=2,
        num_logprobs=6 if current_platform.is_cpu() else 5,
@@ -181,7 +181,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        image_size_factors=[(0.25, 0.5, 1.0)],
@@ -213,7 +213,7 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -237,10 +237,10 @@ VLM_TEST_SETTINGS = {
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<vlm_image>Please describe the image shortly.",
-                "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
+                "cherry_blossom": "<vlm_image>Please infer the season with reason.",
            }
        ),
-        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",  # noqa: E501
+        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
        stop_str=["<|im_end|>"],
        image_size_factors=[(0.10, 0.15)],
        max_tokens=64,
@@ -252,11 +252,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
+                "cherry_blossom": "<image>What is the season?",
            }
        ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -268,11 +268,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
+                "cherry_blossom": "<image>What is the season?",
            }
        ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -311,14 +311,14 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
            }
        ),
        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
-        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
+        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
    ),
    "fuyu": VLMTestInfo(
@@ -342,7 +342,7 @@ VLM_TEST_SETTINGS = {
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
+                "cherry_blossom": "<start_of_image>What is the season?",
            }
        ),
        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
@@ -356,7 +356,7 @@ VLM_TEST_SETTINGS = {
    "glm4v": VLMTestInfo(
        models=["zai-org/glm-4v-9b"],
        test_type=VLMTestType.IMAGE,
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
@@ -377,9 +377,9 @@ VLM_TEST_SETTINGS = {
    "glm4_1v": VLMTestInfo(
        models=["zai-org/GLM-4.1V-9B-Thinking"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
+        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
+        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
        max_model_len=2048,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
@@ -410,10 +410,10 @@ VLM_TEST_SETTINGS = {
            "h2oai/h2ovl-mississippi-2b",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
@@ -444,7 +444,7 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
@@ -529,7 +529,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
@@ -583,7 +583,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
        # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
@@ -598,7 +598,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
    ),
@@ -627,7 +627,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
@@ -640,7 +640,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis2-1B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
@@ -652,7 +652,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis2.5-2B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        video_idx_to_prompt=lambda idx: "<video>\n",
        max_model_len=4096,
        max_num_seqs=2,
@@ -701,8 +701,8 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2-VL-2B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
@@ -717,11 +717,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
-        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
+        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
@@ -754,8 +754,8 @@ VLM_TEST_SETTINGS = {
            VLMTestType.VIDEO,
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -816,7 +816,7 @@ VLM_TEST_SETTINGS = {
        auto_cls=AutoModelForImageTextToText,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
--- a/tests/models/multimodal/generation/test_ultravox.py
+++ b/tests/models/multimodal/generation/test_ultravox.py
@@ -170,7 +170,7 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
                ],
                {
                    "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
+                    "text": f"What's happening in these {len(audio_assets)} audio clips?",  # noqa: E501
                },
            ],
        }
--- a/tests/models/multimodal/generation/test_voxtral.py
+++ b/tests/models/multimodal/generation/test_voxtral.py
@@ -101,16 +101,11 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
        return audio_dict

    audio_chunks = [asset_to_chunk(asset) for asset in audio_assets]
+    text = f"What's happening in these {len(audio_assets)} audio clips?"
    messages = [
        {
            "role": "user",
-            "content": [
-                *audio_chunks,
-                {
-                    "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
-                },
-            ],
+            "content": [*audio_chunks, {"type": "text", "text": text}],
        }
    ]

--- a/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
+++ b/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
@@ -102,8 +102,8 @@ def multi_video_multi_aspect_ratio_inputs(
 def different_patch_input_cases_internvl():
    images = [asset.pil_image.resize((896, 896)) for asset in IMAGE_ASSETS]
    formatter = (
-        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"
-    )  # noqa: E501
+        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"  # noqa: E501
+    )
    single_img_prompts = [
        "<image>\nWhat's the content in the center of the image?",
        "<image>\nWhat is the season?",