Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/v1/kv_offload/test_cpu_gpu.py
+++ b/tests/v1/kv_offload/test_cpu_gpu.py
@@ -22,7 +22,7 @@ NUM_HEADS = [8]
 NUM_LAYERS = [4]
 DTYPES = [torch.bfloat16]
 SEEDS = [0]
-CUDA_DEVICES = ['cuda:0']
+CUDA_DEVICES = ["cuda:0"]
 NUM_MAPPINGS = [3]


@@ -56,35 +56,35 @@ def test_transfer(
    current_platform.seed_everything(seed)

    # create per-layer GPU KV caches
-    attn_backends_list = [
-        FlashAttentionBackend, FlashInferBackend, FlashAttnMLABackend
-    ]
+    attn_backends_list = [FlashAttentionBackend, FlashInferBackend, FlashAttnMLABackend]

    gpu_caches = {}
    attn_backends = {}
    for i in range(num_layers):
-        layer_name = f'layer {i}'
+        layer_name = f"layer {i}"

        attn_backend = attn_backends_list[i % len(attn_backends_list)]
        attn_backends[layer_name] = attn_backend

        gpu_cache_shape = attn_backend.get_kv_cache_shape(
-            num_gpu_blocks, gpu_block_size, num_heads, head_size)
-        gpu_caches[layer_name] = torch.rand(gpu_cache_shape,
-                                            dtype=dtype,
-                                            device=device)
+            num_gpu_blocks, gpu_block_size, num_heads, head_size
+        )
+        gpu_caches[layer_name] = torch.rand(gpu_cache_shape, dtype=dtype, device=device)

    # create handler
    cpu_block_size = gpu_blocks_per_cpu_block * gpu_block_size
-    handler = CpuGpuOffloadingHandler(attn_backends=attn_backends,
-                                      gpu_block_size=gpu_block_size,
-                                      cpu_block_size=cpu_block_size,
-                                      num_cpu_blocks=num_cpu_blocks,
-                                      gpu_caches=gpu_caches)
+    handler = CpuGpuOffloadingHandler(
+        attn_backends=attn_backends,
+        gpu_block_size=gpu_block_size,
+        cpu_block_size=cpu_block_size,
+        num_cpu_blocks=num_cpu_blocks,
+        gpu_caches=gpu_caches,
+    )

    # select block mappings
-    gpu_blocks = random.sample(range(num_gpu_blocks),
-                               num_mappings * gpu_blocks_per_cpu_block)
+    gpu_blocks = random.sample(
+        range(num_gpu_blocks), num_mappings * gpu_blocks_per_cpu_block
+    )
    cpu_blocks = random.sample(range(num_cpu_blocks), num_mappings)

    # convert cpu blocks to gpu block size
@@ -96,9 +96,10 @@ def test_transfer(

    # maybe skip a GPU block to test writing to the middle of a CPU block
    if gpu_to_cpu:
-        gpu_blocks = gpu_blocks[gpu_blocks_per_cpu_block - 1:]
+        gpu_blocks = gpu_blocks[gpu_blocks_per_cpu_block - 1 :]
        cpu_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size[
-            gpu_blocks_per_cpu_block - 1:]
+            gpu_blocks_per_cpu_block - 1 :
+        ]

    # set transfer direction
    if gpu_to_cpu:
@@ -124,8 +125,9 @@ def test_transfer(

    # build dst -> src mapping
    dst_to_src = {}
-    for src_block, dst_block in zip(src_blocks_in_gpu_block_size,
-                                    dst_blocks_in_gpu_block_size):
+    for src_block, dst_block in zip(
+        src_blocks_in_gpu_block_size, dst_blocks_in_gpu_block_size
+    ):
        dst_to_src[dst_block] = src_block

    # build transfer specs
@@ -157,8 +159,11 @@ def test_transfer(
    for dst_block in range(dst_size_in_gpu_blocks):
        src_block_candidate = dst_to_src.get(dst_block)
        for src_cache, dst_cache, orig_dst_cache, kv_dim in zip(
-                src_kv_caches, dst_kv_caches, orig_dst_caches,
-                handler.kv_dim_before_num_blocks):
+            src_kv_caches,
+            dst_kv_caches,
+            orig_dst_caches,
+            handler.kv_dim_before_num_blocks,
+        ):
            if kv_dim:
                # iterate over key, value
                for i in range(2):
@@ -166,12 +171,14 @@ def test_transfer(
                        expected_value = src_cache[i][src_block_candidate]
                    else:
                        expected_value = orig_dst_cache[i][dst_block]
-                    torch.testing.assert_close(dst_cache[i][dst_block].cpu(),
-                                               expected_value.cpu())
+                    torch.testing.assert_close(
+                        dst_cache[i][dst_block].cpu(), expected_value.cpu()
+                    )
            else:
                if src_block_candidate is not None:
                    expected_value = src_cache[src_block_candidate]
                else:
                    expected_value = orig_dst_cache[dst_block]
-                torch.testing.assert_close(dst_cache[dst_block].cpu(),
-                                           expected_value.cpu())
+                torch.testing.assert_close(
+                    dst_cache[dst_block].cpu(), expected_value.cpu()
+                )
--- a/tests/v1/kv_offload/test_cpu_manager.py
+++ b/tests/v1/kv_offload/test_cpu_manager.py
@@ -7,8 +7,11 @@ from typing import Optional
 import numpy as np

 from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.abstract import (LoadStoreSpec, OffloadingEvent,
-                                         PrepareStoreOutput)
+from vllm.v1.kv_offload.abstract import (
+    LoadStoreSpec,
+    OffloadingEvent,
+    PrepareStoreOutput,
+)
 from vllm.v1.kv_offload.backends.cpu import CPUBackend
 from vllm.v1.kv_offload.lru_manager import LRUOffloadingManager
 from vllm.v1.kv_offload.mediums import CPULoadStoreSpec
@@ -26,31 +29,38 @@ def to_hashes(int_hashes: list[int]) -> list[BlockHash]:


 def verify_store_output(
-        prepare_store_output: Optional[PrepareStoreOutput],
-        expected_prepare_store_output: ExpectedPrepareStoreOutput):
+    prepare_store_output: Optional[PrepareStoreOutput],
+    expected_prepare_store_output: ExpectedPrepareStoreOutput,
+):
    assert prepare_store_output is not None
-    assert (prepare_store_output.block_hashes_to_store == to_hashes(
-        expected_prepare_store_output.block_hashes_to_store))
-    assert (prepare_store_output.block_hashes_evicted == to_hashes(
-        expected_prepare_store_output.block_hashes_evicted))
+    assert prepare_store_output.block_hashes_to_store == to_hashes(
+        expected_prepare_store_output.block_hashes_to_store
+    )
+    assert prepare_store_output.block_hashes_evicted == to_hashes(
+        expected_prepare_store_output.block_hashes_evicted
+    )
    store_spec = prepare_store_output.store_spec
    assert isinstance(store_spec, CPULoadStoreSpec)
-    expected_array = np.array(expected_prepare_store_output.store_block_ids,
-                              dtype=np.int64)
+    expected_array = np.array(
+        expected_prepare_store_output.store_block_ids, dtype=np.int64
+    )
    assert np.array_equal(expected_array, store_spec.block_ids)


-def verify_load_output(prepare_load_output: LoadStoreSpec,
-                       expected_prepare_load_output: list[int]):
+def verify_load_output(
+    prepare_load_output: LoadStoreSpec, expected_prepare_load_output: list[int]
+):
    assert isinstance(prepare_load_output, CPULoadStoreSpec)
    expected_array = np.array(expected_prepare_load_output, dtype=np.int64)
    assert np.array_equal(expected_array, prepare_load_output.block_ids)


-def verify_events(events: Iterable[OffloadingEvent],
-                  block_size: int,
-                  expected_stores: tuple[set[int], ...] = (),
-                  expected_evictions: tuple[set[int], ...] = ()):
+def verify_events(
+    events: Iterable[OffloadingEvent],
+    block_size: int,
+    expected_stores: tuple[set[int], ...] = (),
+    expected_evictions: tuple[set[int], ...] = (),
+):
    stores: list[set[BlockHash]] = []
    evictions: list[set[BlockHash]] = []
    for event in events:
@@ -61,8 +71,7 @@ def verify_events(events: Iterable[OffloadingEvent],
        else:
            stores.append(set(event.block_hashes))

-    def to_hash_sets(
-            int_sets: tuple[set[int], ...]) -> tuple[set[BlockHash], ...]:
+    def to_hash_sets(int_sets: tuple[set[int], ...]) -> tuple[set[BlockHash], ...]:
        return tuple([set(to_hashes(list(int_set))) for int_set in int_sets])

    assert tuple(evictions) == to_hash_sets(expected_evictions)
@@ -86,7 +95,8 @@ def test_cpu_manager():
            block_hashes_to_store=[1, 2],
            store_block_ids=[0, 1],
            block_hashes_evicted=[],
-        ))
+        ),
+    )

    # lookup [1, 2] -> not ready
    assert cpu_manager.lookup(to_hashes([1, 2])) == 0
@@ -96,9 +106,9 @@ def test_cpu_manager():

    # complete store [1, 2]
    cpu_manager.complete_store(to_hashes([1, 2]))
-    verify_events(cpu_manager.take_events(),
-                  block_size=block_size,
-                  expected_stores=({1, 2}, ))
+    verify_events(
+        cpu_manager.take_events(), block_size=block_size, expected_stores=({1, 2},)
+    )

    # lookup [1, 2]
    assert cpu_manager.lookup(to_hashes([1])) == 1
@@ -113,12 +123,13 @@ def test_cpu_manager():
            block_hashes_to_store=[3, 4, 5],
            store_block_ids=[2, 3, 0],
            block_hashes_evicted=[1],
-        ))
+        ),
+    )

    # verify eviction event
-    verify_events(cpu_manager.take_events(),
-                  block_size=block_size,
-                  expected_evictions=({1}, ))
+    verify_events(
+        cpu_manager.take_events(), block_size=block_size, expected_evictions=({1},)
+    )

    # prepare store with no space
    assert cpu_manager.prepare_store(to_hashes([1, 6])) is None
@@ -144,7 +155,8 @@ def test_cpu_manager():
            block_hashes_to_store=[6, 7, 8],
            store_block_ids=[3, 2, 1],
            block_hashes_evicted=[2, 3, 4],
-        ))
+        ),
+    )

    # complete store [6, 7, 8]
    cpu_manager.complete_store(to_hashes([6, 7, 8]))
@@ -160,7 +172,8 @@ def test_cpu_manager():
            block_hashes_to_store=[9],
            store_block_ids=[1],
            block_hashes_evicted=[8],
-        ))
+        ),
+    )

    # complete store [7, 9] with failure
    cpu_manager.complete_store(to_hashes([7, 9]), success=False)
@@ -169,7 +182,9 @@ def test_cpu_manager():
    assert cpu_manager.lookup(to_hashes([7])) == 1
    assert cpu_manager.lookup(to_hashes([9])) == 0

-    verify_events(cpu_manager.take_events(),
-                  block_size=block_size,
-                  expected_stores=({3, 4, 5}, {6, 7, 8}),
-                  expected_evictions=({2, 3, 4}, {8}))
+    verify_events(
+        cpu_manager.take_events(),
+        block_size=block_size,
+        expected_stores=({3, 4, 5}, {6, 7, 8}),
+        expected_evictions=({2, 3, 4}, {8}),
+    )
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -20,10 +20,7 @@ def test_cpu_offloading(cpu_block_size: int) -> None:
    kv_transfer_config = KVTransferConfig(
        kv_connector="OffloadingConnector",
        kv_role="kv_both",
-        kv_connector_extra_config={
-            "num_cpu_blocks": 100,
-            "block_size": cpu_block_size
-        },
+        kv_connector_extra_config={"num_cpu_blocks": 100, "block_size": cpu_block_size},
    )

    llm = LLM(
--- a/tests/v1/kv_offload/test_worker.py
+++ b/tests/v1/kv_offload/test_worker.py
@@ -1,17 +1,21 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from vllm.v1.kv_offload.abstract import LoadStoreSpec
-from vllm.v1.kv_offload.worker.worker import (OffloadingHandler,
-                                              OffloadingWorker, TransferResult,
-                                              TransferSpec)
+from vllm.v1.kv_offload.worker.worker import (
+    OffloadingHandler,
+    OffloadingWorker,
+    TransferResult,
+    TransferSpec,
+)


 class LoadStoreSpec1(LoadStoreSpec):
-
-    def __init__(self,
-                 submit_success: bool = True,
-                 async_success: bool = True,
-                 exception: bool = False):
+    def __init__(
+        self,
+        submit_success: bool = True,
+        async_success: bool = True,
+        exception: bool = False,
+    ):
        self.finished = False
        self.submit_success = submit_success
        self.async_success = async_success
@@ -26,7 +30,6 @@ class LoadStoreSpec1(LoadStoreSpec):


 class LoadStoreSpec2(LoadStoreSpec):
-
    @staticmethod
    def medium() -> str:
        return "2"
@@ -36,7 +39,6 @@ class LoadStoreSpec2(LoadStoreSpec):


 class OffloadingHandler1To2(OffloadingHandler):
-
    def __init__(self):
        self.transfers: dict[int, LoadStoreSpec1] = {}

@@ -63,7 +65,6 @@ class OffloadingHandler1To2(OffloadingHandler):


 class OffloadingHandler2To1(OffloadingHandler):
-
    def __init__(self):
        self.transfers: dict[int, LoadStoreSpec1] = {}

@@ -144,9 +145,9 @@ def test_offloading_worker():
    assert 7 in handler2to1.transfers

    # verify result of 3rd and 4th transfers
-    assert (sorted(worker.get_finished()) == [(3, False), (4, True)])
+    assert sorted(worker.get_finished()) == [(3, False), (4, True)]

    # complete 6th and 7th transfers
    src6.finished = True
    dst7.finished = True
-    assert (sorted(worker.get_finished()) == [(6, True), (7, True)])
+    assert sorted(worker.get_finished()) == [(6, True), (7, True)]