Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/compile/piecewise/test_simple.py
+++ b/tests/compile/piecewise/test_simple.py
@@ -11,8 +11,13 @@ from torch import nn

 from vllm.compilation.counter import compilation_counter
 from vllm.compilation.decorators import support_torch_compile
-from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode,
-                         VllmConfig, set_current_vllm_config)
+from vllm.config import (
+    CompilationConfig,
+    CompilationLevel,
+    CUDAGraphMode,
+    VllmConfig,
+    set_current_vllm_config,
+)
 from vllm.envs import VLLM_USE_V1
 from vllm.forward_context import BatchDescriptor, set_forward_context
 from vllm.utils import is_torch_equal_or_newer
@@ -23,12 +28,7 @@ from ..silly_attention import get_global_counter, reset_global_counter

@support_torch_compile
 class SillyModel(nn.Module):
-
-    def __init__(self,
-                 *,
-                 vllm_config: VllmConfig,
-                 prefix: str = '',
-                 **kwargs) -> None:
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "", **kwargs) -> None:
        super().__init__()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -60,53 +60,65 @@ def _run_simple_model(
    expected_num_backend_compilations,
    expected_num_cudagraph_captured,
 ):
-    vllm_config = VllmConfig(compilation_config=CompilationConfig(
-        level=CompilationLevel.PIECEWISE,
-        use_cudagraph=True,
-        use_inductor=use_inductor,
-        splitting_ops=splitting_ops,
-        use_inductor_graph_partition=use_inductor_graph_partition,
-        cudagraph_copy_inputs=True,
-        cudagraph_capture_sizes=[1, 2],
-    ))
+    vllm_config = VllmConfig(
+        compilation_config=CompilationConfig(
+            level=CompilationLevel.PIECEWISE,
+            use_cudagraph=True,
+            use_inductor=use_inductor,
+            splitting_ops=splitting_ops,
+            use_inductor_graph_partition=use_inductor_graph_partition,
+            cudagraph_copy_inputs=True,
+            cudagraph_capture_sizes=[1, 2],
+        )
+    )
    with set_current_vllm_config(vllm_config):
-        model = SillyModel(vllm_config=vllm_config, prefix='')
+        model = SillyModel(vllm_config=vllm_config, prefix="")

    inputs = torch.randn(100).cuda()

-    with compilation_counter.expect(
+    with (
+        compilation_counter.expect(
            num_graphs_seen=1,  # one graph for the model
            num_piecewise_graphs_seen=expected_num_piecewise_graphs_seen,
-            num_piecewise_capturable_graphs_seen=
-            expected_num_piecewise_capturable_graphs_seen,
+            num_piecewise_capturable_graphs_seen=expected_num_piecewise_capturable_graphs_seen,
            num_backend_compilations=expected_num_backend_compilations,
            num_cudagraph_captured=expected_num_cudagraph_captured,
-    ), set_forward_context(None,
-                           vllm_config=vllm_config):  # background context
+        ),
+        set_forward_context(None, vllm_config=vllm_config),
+    ):  # background context
        # warm up with background context
        model(inputs)

        # capturing/replaying should under context of cudagraph dispatching
        with set_forward_context(
-                None,
-                vllm_config=vllm_config,
-                cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
-                batch_descriptor=BatchDescriptor(num_tokens=2, )):
+            None,
+            vllm_config=vllm_config,
+            cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
+            batch_descriptor=BatchDescriptor(
+                num_tokens=2,
+            ),
+        ):
            model(torch.randn(2).cuda())
        with set_forward_context(
-                None,
-                vllm_config=vllm_config,
-                cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
-                batch_descriptor=BatchDescriptor(num_tokens=1, )):
+            None,
+            vllm_config=vllm_config,
+            cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
+            batch_descriptor=BatchDescriptor(
+                num_tokens=1,
+            ),
+        ):
            model(torch.randn(1).cuda())

        input = torch.zeros(2).cuda()
        reset_global_counter()
        with set_forward_context(
-                None,
-                vllm_config=vllm_config,
-                cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
-                batch_descriptor=BatchDescriptor(num_tokens=2, )):
+            None,
+            vllm_config=vllm_config,
+            cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE,
+            batch_descriptor=BatchDescriptor(
+                num_tokens=2,
+            ),
+        ):
            output = model(input)
        assert get_global_counter() == 2
        assert torch.allclose(output.cpu(), torch.tensor([19.0, 19.0]))
@@ -122,10 +134,8 @@ def test_simple_piecewise_compile(use_inductor):
        use_inductor=use_inductor,
        expected_num_piecewise_graphs_seen=5,  # 2 * num_layers + 1
        expected_num_piecewise_capturable_graphs_seen=3,  # 1 + num_layers
-        expected_num_backend_compilations=
-        3,  # num_piecewise_capturable_graphs_seen
-        expected_num_cudagraph_captured=
-        6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        expected_num_backend_compilations=3,  # num_piecewise_capturable_graphs_seen
+        expected_num_cudagraph_captured=6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
    )


@@ -134,8 +144,7 @@ def test_simple_piecewise_compile(use_inductor):
 def test_simple_inductor_graph_partition(splitting_ops):
    assert VLLM_USE_V1
    if not is_torch_equal_or_newer("2.9.0.dev"):
-        pytest.skip("inductor graph partition is only available "
-                    "in PyTorch 2.9+")
+        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")

    _run_simple_model(
        # inductor graph partition automatically resets splitting_ops
@@ -143,13 +152,9 @@ def test_simple_inductor_graph_partition(splitting_ops):
        splitting_ops=splitting_ops,
        use_inductor_graph_partition=True,
        use_inductor=True,
-        expected_num_piecewise_graphs_seen=
-        1,  # since not splitting at fx graph level
-        expected_num_piecewise_capturable_graphs_seen=
-        1,  # since not splitting at fx graph level
-        expected_num_backend_compilations=
-        1,  # since not splitting at fx graph level
-        expected_num_cudagraph_captured=
-        6,  # inductor graph partition still captures 6
+        expected_num_piecewise_graphs_seen=1,  # since not splitting at fx graph level
+        expected_num_piecewise_capturable_graphs_seen=1,  # since not splitting at fx graph level
+        expected_num_backend_compilations=1,  # since not splitting at fx graph level
+        expected_num_cudagraph_captured=6,  # inductor graph partition still captures 6
        # graph, same as fx graph partition.
    )