Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -21,24 +21,41 @@ from transformers import AutoTokenizer
 from vllm_test_utils.monitor import monitor

 from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
-from vllm.transformers_utils.detokenizer_utils import (
-    convert_ids_list_to_tokens)
+from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens

 # isort: off
 from vllm.utils import (
-    CacheInfo, FlexibleArgumentParser, LRUCache, MemorySnapshot,
-    PlaceholderModule, bind_kv_cache, common_broadcastable_dtype,
-    current_stream, deprecate_kwargs, get_open_port, get_tcp_uri,
-    is_lossless_cast, join_host_port, make_zmq_path, make_zmq_socket,
-    memory_profiling, merge_async_iterators, sha256, split_host_port,
-    split_zmq_path, supports_kw, swap_dict_values, unique_filepath)
+    CacheInfo,
+    FlexibleArgumentParser,
+    LRUCache,
+    MemorySnapshot,
+    PlaceholderModule,
+    bind_kv_cache,
+    common_broadcastable_dtype,
+    current_stream,
+    deprecate_kwargs,
+    get_open_port,
+    get_tcp_uri,
+    is_lossless_cast,
+    join_host_port,
+    make_zmq_path,
+    make_zmq_socket,
+    memory_profiling,
+    merge_async_iterators,
+    sha256,
+    split_host_port,
+    split_zmq_path,
+    supports_kw,
+    swap_dict_values,
+    unique_filepath,
+)
+
 # isort: on
 from ..utils import create_new_process_for_each_test, error_on_warning


@pytest.mark.asyncio
 async def test_merge_async_iterators():
-
    async def mock_async_iterator(idx: int):
        try:
            while True:
@@ -72,7 +89,6 @@ async def test_merge_async_iterators():


 def test_deprecate_kwargs_always():
-
    @deprecate_kwargs("old_arg", is_deprecated=True)
    def dummy(*, old_arg: object = None, new_arg: object = None):
        pass
@@ -85,7 +101,6 @@ def test_deprecate_kwargs_always():


 def test_deprecate_kwargs_never():
-
    @deprecate_kwargs("old_arg", is_deprecated=False)
    def dummy(*, old_arg: object = None, new_arg: object = None):
        pass
@@ -120,7 +135,6 @@ def test_deprecate_kwargs_dynamic():


 def test_deprecate_kwargs_additional_message():
-
    @deprecate_kwargs("old_arg", is_deprecated=True, additional_message="abcd")
    def dummy(*, old_arg: object = None, new_arg: object = None):
        pass
@@ -145,99 +159,107 @@ def test_get_open_port(monkeypatch: pytest.MonkeyPatch):
@pytest.fixture
 def parser():
    parser = FlexibleArgumentParser()
-    parser.add_argument('--image-input-type',
-                        choices=['pixel_values', 'image_features'])
-    parser.add_argument('--model-name')
-    parser.add_argument('--batch-size', type=int)
-    parser.add_argument('--enable-feature', action='store_true')
-    parser.add_argument('--hf-overrides', type=json.loads)
-    parser.add_argument('-O', '--compilation-config', type=json.loads)
+    parser.add_argument(
+        "--image-input-type", choices=["pixel_values", "image_features"]
+    )
+    parser.add_argument("--model-name")
+    parser.add_argument("--batch-size", type=int)
+    parser.add_argument("--enable-feature", action="store_true")
+    parser.add_argument("--hf-overrides", type=json.loads)
+    parser.add_argument("-O", "--compilation-config", type=json.loads)
    return parser


@pytest.fixture
 def parser_with_config():
    parser = FlexibleArgumentParser()
-    parser.add_argument('serve')
-    parser.add_argument('model_tag', nargs='?')
-    parser.add_argument('--model', type=str)
-    parser.add_argument('--served-model-name', type=str)
-    parser.add_argument('--config', type=str)
-    parser.add_argument('--port', type=int)
-    parser.add_argument('--tensor-parallel-size', type=int)
-    parser.add_argument('--trust-remote-code', action='store_true')
+    parser.add_argument("serve")
+    parser.add_argument("model_tag", nargs="?")
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--served-model-name", type=str)
+    parser.add_argument("--config", type=str)
+    parser.add_argument("--port", type=int)
+    parser.add_argument("--tensor-parallel-size", type=int)
+    parser.add_argument("--trust-remote-code", action="store_true")
    return parser


 def test_underscore_to_dash(parser):
-    args = parser.parse_args(['--image_input_type', 'pixel_values'])
-    assert args.image_input_type == 'pixel_values'
+    args = parser.parse_args(["--image_input_type", "pixel_values"])
+    assert args.image_input_type == "pixel_values"


 def test_mixed_usage(parser):
-    args = parser.parse_args([
-        '--image_input_type', 'image_features', '--model-name',
-        'facebook/opt-125m'
-    ])
-    assert args.image_input_type == 'image_features'
-    assert args.model_name == 'facebook/opt-125m'
+    args = parser.parse_args(
+        ["--image_input_type", "image_features", "--model-name", "facebook/opt-125m"]
+    )
+    assert args.image_input_type == "image_features"
+    assert args.model_name == "facebook/opt-125m"


 def test_with_equals_sign(parser):
    args = parser.parse_args(
-        ['--image_input_type=pixel_values', '--model-name=facebook/opt-125m'])
-    assert args.image_input_type == 'pixel_values'
-    assert args.model_name == 'facebook/opt-125m'
+        ["--image_input_type=pixel_values", "--model-name=facebook/opt-125m"]
+    )
+    assert args.image_input_type == "pixel_values"
+    assert args.model_name == "facebook/opt-125m"


 def test_with_int_value(parser):
-    args = parser.parse_args(['--batch_size', '32'])
+    args = parser.parse_args(["--batch_size", "32"])
    assert args.batch_size == 32
-    args = parser.parse_args(['--batch-size', '32'])
+    args = parser.parse_args(["--batch-size", "32"])
    assert args.batch_size == 32


 def test_with_bool_flag(parser):
-    args = parser.parse_args(['--enable_feature'])
+    args = parser.parse_args(["--enable_feature"])
    assert args.enable_feature is True
-    args = parser.parse_args(['--enable-feature'])
+    args = parser.parse_args(["--enable-feature"])
    assert args.enable_feature is True


 def test_invalid_choice(parser):
    with pytest.raises(SystemExit):
-        parser.parse_args(['--image_input_type', 'invalid_choice'])
+        parser.parse_args(["--image_input_type", "invalid_choice"])


 def test_missing_required_argument(parser):
-    parser.add_argument('--required-arg', required=True)
+    parser.add_argument("--required-arg", required=True)
    with pytest.raises(SystemExit):
        parser.parse_args([])


 def test_cli_override_to_config(parser_with_config, cli_config_file):
-    args = parser_with_config.parse_args([
-        'serve', 'mymodel', '--config', cli_config_file,
-        '--tensor-parallel-size', '3'
-    ])
+    args = parser_with_config.parse_args(
+        ["serve", "mymodel", "--config", cli_config_file, "--tensor-parallel-size", "3"]
+    )
    assert args.tensor_parallel_size == 3
-    args = parser_with_config.parse_args([
-        'serve', 'mymodel', '--tensor-parallel-size', '3', '--config',
-        cli_config_file
-    ])
+    args = parser_with_config.parse_args(
+        ["serve", "mymodel", "--tensor-parallel-size", "3", "--config", cli_config_file]
+    )
    assert args.tensor_parallel_size == 3
    assert args.port == 12312
-    args = parser_with_config.parse_args([
-        'serve', 'mymodel', '--tensor-parallel-size', '3', '--config',
-        cli_config_file, '--port', '666'
-    ])
+    args = parser_with_config.parse_args(
+        [
+            "serve",
+            "mymodel",
+            "--tensor-parallel-size",
+            "3",
+            "--config",
+            cli_config_file,
+            "--port",
+            "666",
+        ]
+    )
    assert args.tensor_parallel_size == 3
    assert args.port == 666


 def test_config_args(parser_with_config, cli_config_file):
    args = parser_with_config.parse_args(
-        ['serve', 'mymodel', '--config', cli_config_file])
+        ["serve", "mymodel", "--config", cli_config_file]
+    )
    assert args.tensor_parallel_size == 2
    assert args.trust_remote_code

@@ -245,22 +267,31 @@ def test_config_args(parser_with_config, cli_config_file):
 def test_config_file(parser_with_config):
    with pytest.raises(FileNotFoundError):
        parser_with_config.parse_args(
-            ['serve', 'mymodel', '--config', 'test_config.yml'])
+            ["serve", "mymodel", "--config", "test_config.yml"]
+        )

    with pytest.raises(ValueError):
        parser_with_config.parse_args(
-            ['serve', 'mymodel', '--config', './data/test_config.json'])
+            ["serve", "mymodel", "--config", "./data/test_config.json"]
+        )

    with pytest.raises(ValueError):
-        parser_with_config.parse_args([
-            'serve', 'mymodel', '--tensor-parallel-size', '3', '--config',
-            '--batch-size', '32'
-        ])
+        parser_with_config.parse_args(
+            [
+                "serve",
+                "mymodel",
+                "--tensor-parallel-size",
+                "3",
+                "--config",
+                "--batch-size",
+                "32",
+            ]
+        )


 def test_no_model_tag(parser_with_config, cli_config_file):
    with pytest.raises(ValueError):
-        parser_with_config.parse_args(['serve', '--config', cli_config_file])
+        parser_with_config.parse_args(["serve", "--config", cli_config_file])


 def test_dict_args(parser):
@@ -323,7 +354,7 @@ def test_dict_args(parser):
        },
        "key14": {
            "key15": "-minus.and.dot",
-        }
+        },
    }
    assert parsed_args.compilation_config == {
        "level": 1,
@@ -375,24 +406,29 @@ def test_duplicate_dict_args(caplog_vllm, parser):
        (lambda foo, **kwargs: None, "something_else", False, True, True),
        (lambda foo, **kwargs: None, "kwargs", True, True, False),
        (lambda foo, **kwargs: None, "foo", True, True, False),
-    ])
+    ],
+)
 # yapf: disable
-def test_supports_kw(callable,kw_name,requires_kw_only,
-                     allow_var_kwargs,is_supported):
-    assert supports_kw(
+def test_supports_kw(
+    callable, kw_name, requires_kw_only, allow_var_kwargs, is_supported
+):
+    assert (
+        supports_kw(
            callable=callable,
            kw_name=kw_name,
            requires_kw_only=requires_kw_only,
-        allow_var_kwargs=allow_var_kwargs
-    ) == is_supported
+            allow_var_kwargs=allow_var_kwargs,
+        )
+        == is_supported
+    )


@create_new_process_for_each_test()
 def test_memory_profiling():
    # Fake out some model loading + inference memory usage to test profiling
    # Memory used by other processes will show up as cuda usage outside of torch
-    from vllm.distributed.device_communicators.cuda_wrapper import (
-        CudaRTLibrary)
+    from vllm.distributed.device_communicators.cuda_wrapper import CudaRTLibrary
+
    lib = CudaRTLibrary()
    # 512 MiB allocation outside of this instance
    handle1 = lib.cudaMalloc(512 * 1024 * 1024)
@@ -401,9 +437,9 @@ def test_memory_profiling():

    # load weights

-    weights = torch.randn(128, 1024, 1024, device='cuda', dtype=torch.float32)
+    weights = torch.randn(128, 1024, 1024, device="cuda", dtype=torch.float32)

-    weights_memory = 128 * 1024 * 1024 * 4 # 512 MiB
+    weights_memory = 128 * 1024 * 1024 * 4  # 512 MiB

    def measure_current_non_torch():
        free, total = torch.cuda.mem_get_info()
@@ -412,11 +448,14 @@ def test_memory_profiling():
        current_non_torch = current_used - current_torch
        return current_non_torch

-    with memory_profiling(baseline_snapshot=baseline_snapshot,
-    weights_memory=weights_memory) as result, \
-        monitor(measure_current_non_torch) as monitored_values:
+    with (
+        memory_profiling(
+            baseline_snapshot=baseline_snapshot, weights_memory=weights_memory
+        ) as result,
+        monitor(measure_current_non_torch) as monitored_values,
+    ):
        # make a memory spike, 1 GiB
-        spike = torch.randn(256, 1024, 1024, device='cuda', dtype=torch.float32)
+        spike = torch.randn(256, 1024, 1024, device="cuda", dtype=torch.float32)
        del spike

        # Add some extra non-torch memory 256 MiB (simulate NCCL)
@@ -431,7 +470,7 @@ def test_memory_profiling():
    # 5% tolerance is caused by cuda runtime.
    # we cannot control cuda runtime in the granularity of bytes,
    # which causes a small error (<10 MiB in practice)
-    non_torch_ratio = result.non_torch_increase / (256 * 1024 * 1024) # noqa
+    non_torch_ratio = result.non_torch_increase / (256 * 1024 * 1024)  # noqa
    assert abs(non_torch_ratio - 1) <= 0.05
    assert result.torch_peak_increase == 1024 * 1024 * 1024
    del weights
@@ -443,87 +482,84 @@ def test_bind_kv_cache():
    from vllm.attention import Attention

    ctx = {
-        'layers.0.self_attn': Attention(32, 128, 0.1),
-        'layers.1.self_attn': Attention(32, 128, 0.1),
-        'layers.2.self_attn': Attention(32, 128, 0.1),
-        'layers.3.self_attn': Attention(32, 128, 0.1),
+        "layers.0.self_attn": Attention(32, 128, 0.1),
+        "layers.1.self_attn": Attention(32, 128, 0.1),
+        "layers.2.self_attn": Attention(32, 128, 0.1),
+        "layers.3.self_attn": Attention(32, 128, 0.1),
    }
    kv_cache = [
-        torch.zeros((1, )),
-        torch.zeros((1, )),
-        torch.zeros((1, )),
-        torch.zeros((1, )),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
    ]
    bind_kv_cache(ctx, [kv_cache])
-    assert ctx['layers.0.self_attn'].kv_cache[0] is kv_cache[0]
-    assert ctx['layers.1.self_attn'].kv_cache[0] is kv_cache[1]
-    assert ctx['layers.2.self_attn'].kv_cache[0] is kv_cache[2]
-    assert ctx['layers.3.self_attn'].kv_cache[0] is kv_cache[3]
+    assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache[0]
+    assert ctx["layers.1.self_attn"].kv_cache[0] is kv_cache[1]
+    assert ctx["layers.2.self_attn"].kv_cache[0] is kv_cache[2]
+    assert ctx["layers.3.self_attn"].kv_cache[0] is kv_cache[3]
+

 def test_bind_kv_cache_kv_sharing():
    from vllm.attention import Attention

    ctx = {
-        'layers.0.self_attn': Attention(32, 128, 0.1),
-        'layers.1.self_attn': Attention(32, 128, 0.1),
-        'layers.2.self_attn': Attention(32, 128, 0.1),
-        'layers.3.self_attn': Attention(32, 128, 0.1),
+        "layers.0.self_attn": Attention(32, 128, 0.1),
+        "layers.1.self_attn": Attention(32, 128, 0.1),
+        "layers.2.self_attn": Attention(32, 128, 0.1),
+        "layers.3.self_attn": Attention(32, 128, 0.1),
    }
    kv_cache = [
-        torch.zeros((1, )),
-        torch.zeros((1, )),
-        torch.zeros((1, )),
-        torch.zeros((1, )),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
    ]
    shared_kv_cache_layers = {
-        'layers.2.self_attn': 'layers.1.self_attn',
-        'layers.3.self_attn': 'layers.0.self_attn'
+        "layers.2.self_attn": "layers.1.self_attn",
+        "layers.3.self_attn": "layers.0.self_attn",
    }
    bind_kv_cache(ctx, [kv_cache], shared_kv_cache_layers)
-    assert ctx['layers.0.self_attn'].kv_cache[0] is kv_cache[0]
-    assert ctx['layers.1.self_attn'].kv_cache[0] is kv_cache[1]
-    assert ctx['layers.2.self_attn'].kv_cache[0] is kv_cache[1]
-    assert ctx['layers.3.self_attn'].kv_cache[0] is kv_cache[0]
+    assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache[0]
+    assert ctx["layers.1.self_attn"].kv_cache[0] is kv_cache[1]
+    assert ctx["layers.2.self_attn"].kv_cache[0] is kv_cache[1]
+    assert ctx["layers.3.self_attn"].kv_cache[0] is kv_cache[0]
+

 def test_bind_kv_cache_non_attention():
    from vllm.attention import Attention

    # example from Jamba PP=2
    ctx = {
-        'model.layers.20.attn': Attention(32, 128, 0.1),
-        'model.layers.28.attn': Attention(32, 128, 0.1),
+        "model.layers.20.attn": Attention(32, 128, 0.1),
+        "model.layers.28.attn": Attention(32, 128, 0.1),
    }
    kv_cache = [
-        torch.zeros((1, )),
-        torch.zeros((1, )),
+        torch.zeros((1,)),
+        torch.zeros((1,)),
    ]
    bind_kv_cache(ctx, [kv_cache])
-    assert ctx['model.layers.20.attn'].kv_cache[0] is kv_cache[0]
-    assert ctx['model.layers.28.attn'].kv_cache[0] is kv_cache[1]
+    assert ctx["model.layers.20.attn"].kv_cache[0] is kv_cache[0]
+    assert ctx["model.layers.28.attn"].kv_cache[0] is kv_cache[1]


 def test_bind_kv_cache_pp():
    with patch("vllm.utils.cuda_device_count_stateless", lambda: 2):
        # this test runs with 1 GPU, but we simulate 2 GPUs
-        cfg = VllmConfig(
-            parallel_config=ParallelConfig(pipeline_parallel_size=2))
+        cfg = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=2))
    with set_current_vllm_config(cfg):
        from vllm.attention import Attention

        ctx = {
-            'layers.0.self_attn': Attention(32, 128, 0.1),
+            "layers.0.self_attn": Attention(32, 128, 0.1),
        }
-        kv_cache = [
-            [torch.zeros((1, ))],
-            [torch.zeros((1, ))]
-        ]
+        kv_cache = [[torch.zeros((1,))], [torch.zeros((1,))]]
        bind_kv_cache(ctx, kv_cache)
-        assert ctx['layers.0.self_attn'].kv_cache[0] is kv_cache[0][0]
-        assert ctx['layers.0.self_attn'].kv_cache[1] is kv_cache[1][0]
+        assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache[0][0]
+        assert ctx["layers.0.self_attn"].kv_cache[1] is kv_cache[1][0]


 class TestLRUCache(LRUCache):
-
    def _on_remove(self, key, value):
        if not hasattr(self, "_remove_counter"):
            self._remove_counter = 0