Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/models/multimodal/generation/test_phi4mm.py
+++ b/tests/models/multimodal/generation/test_phi4mm.py
@@ -17,31 +17,39 @@ from vllm.lora.request import LoRARequest
 from vllm.multimodal.image import convert_image_mode, rescale_image_size
 from vllm.platforms import current_platform

-from ....conftest import (IMAGE_ASSETS, HfRunner, PromptAudioInput,
-                          PromptImageInput, VllmRunner)
+from ....conftest import (
+    IMAGE_ASSETS,
+    HfRunner,
+    PromptAudioInput,
+    PromptImageInput,
+    VllmRunner,
+)
 from ....utils import large_gpu_test
 from ...utils import check_logprobs_close

-HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
-    "stop_sign":
-    "<|user|>\n<|image_1|>\nWhat's the content of the image?<|end|>\n<|assistant|>\n",  # noqa: E501
-    "cherry_blossom":
-    "<|user|>\n<|image_1|>\nPlease infer the season with reason in details.<|end|>\n<|assistant|>\n",  # noqa: E501
-})
-HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n"  # noqa: E501
+HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts(
+    {
+        "stop_sign": "<|user|>\n<|image_1|>\nWhat's the content of the image?<|end|>\n<|assistant|>\n",  # noqa: E501
+        "cherry_blossom": "<|user|>\n<|image_1|>\nPlease infer the season with reason in details.<|end|>\n<|assistant|>\n",  # noqa: E501
+    }
+)
+HF_MULTIIMAGE_IMAGE_PROMPT = (
+    "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n"  # noqa: E501
+)

 model_path = snapshot_download("microsoft/Phi-4-multimodal-instruct")
 # Since the vision-lora and speech-lora co-exist with the base model,
 # we have to manually specify the path of the lora weights.
 vision_lora_path = os.path.join(model_path, "vision-lora")
-speech_question = os.path.join(model_path, "examples",
-                               "what_is_shown_in_this_image.wav")
+speech_question = os.path.join(
+    model_path, "examples", "what_is_shown_in_this_image.wav"
+)
 models = [model_path]


-def vllm_to_hf_output(vllm_output: tuple[list[int], str,
-                                         Optional[SampleLogprobs]],
-                      model: str):
+def vllm_to_hf_output(
+    vllm_output: tuple[list[int], str, Optional[SampleLogprobs]], model: str
+):
    """Sanitize vllm output to be comparable with hf output."""
    _, output_str, out_logprobs = vllm_output

@@ -71,8 +79,7 @@ if current_platform.is_rocm():
 def run_test(
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
-    inputs: Sequence[tuple[list[str], PromptImageInput,
-                           Optional[PromptAudioInput]]],
+    inputs: Sequence[tuple[list[str], PromptImageInput, Optional[PromptAudioInput]]],
    model: str,
    *,
    max_model_len: int,
@@ -98,27 +105,29 @@ def run_test(
    # will hurt multiprocessing backend with fork method (the default method).
    # max_model_len should be greater than image_feature_size
    with vllm_runner(
-            model,
-            runner="generate",
-            max_model_len=max_model_len,
-            max_num_seqs=2,
-            dtype=dtype,
-            limit_mm_per_prompt={"image": mm_limit},
-            tensor_parallel_size=tensor_parallel_size,
-            distributed_executor_backend=distributed_executor_backend,
-            enable_lora=True,
-            max_lora_rank=320,
-            gpu_memory_utilization=0.8,  # set to 0.8 to avoid OOM in CI
-            enforce_eager=True,
+        model,
+        runner="generate",
+        max_model_len=max_model_len,
+        max_num_seqs=2,
+        dtype=dtype,
+        limit_mm_per_prompt={"image": mm_limit},
+        tensor_parallel_size=tensor_parallel_size,
+        distributed_executor_backend=distributed_executor_backend,
+        enable_lora=True,
+        max_lora_rank=320,
+        gpu_memory_utilization=0.8,  # set to 0.8 to avoid OOM in CI
+        enforce_eager=True,
    ) as vllm_model:
        lora_request = LoRARequest("vision", 1, vision_lora_path)
        vllm_outputs_per_case = [
-            vllm_model.generate_greedy_logprobs(prompts,
-                                                max_tokens,
-                                                num_logprobs=num_logprobs,
-                                                images=images,
-                                                audios=audios,
-                                                lora_request=lora_request)
+            vllm_model.generate_greedy_logprobs(
+                prompts,
+                max_tokens,
+                num_logprobs=num_logprobs,
+                images=images,
+                audios=audios,
+                lora_request=lora_request,
+            )
            for prompts, images, audios in inputs
        ]

@@ -127,42 +136,36 @@ def run_test(
    pytest.skip("HF impl is not compatible with current transformers")

    hf_model_kwargs = {"_attn_implementation": "sdpa"}
-    with hf_runner(model, dtype=dtype,
-                   model_kwargs=hf_model_kwargs) as hf_model:
-
+    with hf_runner(model, dtype=dtype, model_kwargs=hf_model_kwargs) as hf_model:
        hf_processor = hf_model.processor
        eos_token_id = hf_processor.tokenizer.eos_token_id

-        def patch_hf_processor(*args,
-                               text="",
-                               images=None,
-                               audio=None,
-                               sampling_rate=None,
-                               **kwargs):
+        def patch_hf_processor(
+            *args, text="", images=None, audio=None, sampling_rate=None, **kwargs
+        ):
            audios = None
            if audio is not None and sampling_rate is not None:
                audios = [(audio, sampling_rate)]
-            return hf_processor(*args,
-                                text=text,
-                                images=images,
-                                audios=audios,
-                                **kwargs)
+            return hf_processor(
+                *args, text=text, images=images, audios=audios, **kwargs
+            )

        hf_model.processor = patch_hf_processor

        hf_outputs_per_case = [
-            hf_model.generate_greedy_logprobs_limit(prompts,
-                                                    max_tokens,
-                                                    num_logprobs=num_logprobs,
-                                                    images=images,
-                                                    audios=audios,
-                                                    eos_token_id=eos_token_id,
-                                                    num_logits_to_keep=0)
+            hf_model.generate_greedy_logprobs_limit(
+                prompts,
+                max_tokens,
+                num_logprobs=num_logprobs,
+                images=images,
+                audios=audios,
+                eos_token_id=eos_token_id,
+                num_logits_to_keep=0,
+            )
            for prompts, images, audios in inputs
        ]

-    for hf_outputs, vllm_outputs in zip(hf_outputs_per_case,
-                                        vllm_outputs_per_case):
+    for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
        check_logprobs_close(
            outputs_0_lst=hf_outputs,
            outputs_1_lst=vllm_outputs,
@@ -189,16 +192,27 @@ def run_test(
@pytest.mark.parametrize("max_model_len", [12800])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
-def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
-                dtype: str, max_model_len: int, max_tokens: int,
-                num_logprobs: int) -> None:
+def test_models(
+    hf_runner,
+    vllm_runner,
+    image_assets,
+    model,
+    size_factors,
+    dtype: str,
+    max_model_len: int,
+    max_tokens: int,
+    num_logprobs: int,
+) -> None:
    images = [asset.pil_image for asset in image_assets]

-    inputs_per_image = [(
-        [prompt for _ in size_factors],
-        [rescale_image_size(image, factor) for factor in size_factors],
-        None,
-    ) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
+    inputs_per_image = [
+        (
+            [prompt for _ in size_factors],
+            [rescale_image_size(image, factor) for factor in size_factors],
+            None,
+        )
+        for image, prompt in zip(images, HF_IMAGE_PROMPTS)
+    ]

    run_test(
        hf_runner,
@@ -233,16 +247,26 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
@pytest.mark.parametrize("max_model_len", [25600])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
-def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
-                             size_factors, dtype: str, max_model_len: int,
-                             max_tokens: int, num_logprobs: int) -> None:
+def test_multi_images_models(
+    hf_runner,
+    vllm_runner,
+    image_assets,
+    model,
+    size_factors,
+    dtype: str,
+    max_model_len: int,
+    max_tokens: int,
+    num_logprobs: int,
+) -> None:
    images = [asset.pil_image for asset in image_assets]

    inputs_per_case = [
        (
            [HF_MULTIIMAGE_IMAGE_PROMPT for _ in size_factors],
-            [[rescale_image_size(image, factor) for image in images]
-             for factor in size_factors],
+            [
+                [rescale_image_size(image, factor) for image in images]
+                for factor in size_factors
+            ],
            None,
        ),
    ]
@@ -266,10 +290,15 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
@pytest.mark.parametrize("max_model_len", [12800])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
-def test_vision_speech_models(hf_runner, vllm_runner, model, dtype: str,
-                              max_model_len: int, max_tokens: int,
-                              num_logprobs: int) -> None:
-
+def test_vision_speech_models(
+    hf_runner,
+    vllm_runner,
+    model,
+    dtype: str,
+    max_model_len: int,
+    max_tokens: int,
+    num_logprobs: int,
+) -> None:
    # use the example speech question so that the model outputs are reasonable
    audio = librosa.load(speech_question, sr=None)
    image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")