Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/kernels/quantization/test_machete_mm.py
+++ b/tests/kernels/quantization/test_machete_mm.py
@@ -15,15 +15,16 @@ import torch
 from tests.kernels.utils import opcheck
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.machete_utils import (
-    query_machete_supported_group_sizes)
+    query_machete_supported_group_sizes,
+)
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    pack_rows, quantize_weights)
+    pack_rows,
+    quantize_weights,
+)
 from vllm.platforms import current_platform
 from vllm.scalar_type import ScalarType, scalar_types

-CUDA_DEVICES = [
-    f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
-]
+CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]

 # TODO: in future PR refactor this and `is_quant_method_supported` in the kernel
 #  unit tests to a common utility function. Currently the use of
@@ -72,29 +73,38 @@ class Tensors:
 #  Ch Scales Type, Tok Scales Type)
 # NOTE: None "Scale Type" means the act type is floating point
 #       None "Output Type" means the output type is the same as the act type
-TestTypeTuple = tuple[list[torch.dtype], ScalarType, Optional[torch.dtype],
-                      Optional[torch.dtype], bool]
+TestTypeTuple = tuple[
+    list[torch.dtype], ScalarType, Optional[torch.dtype], Optional[torch.dtype], bool
+]
 TEST_TYPES = [
    # GPTQ style
-    *(TypeConfig(act_type=a_type,
-                 weight_type=w_type,
-                 output_type=None,
-                 group_scale_type=a_type,
-                 group_zero_type=None,
-                 channel_scale_type=None,
-                 token_scale_type=None)
-      for w_type in [scalar_types.uint4b8, scalar_types.uint8b128]
-      for a_type in [torch.float16, torch.bfloat16]),
+    *(
+        TypeConfig(
+            act_type=a_type,
+            weight_type=w_type,
+            output_type=None,
+            group_scale_type=a_type,
+            group_zero_type=None,
+            channel_scale_type=None,
+            token_scale_type=None,
+        )
+        for w_type in [scalar_types.uint4b8, scalar_types.uint8b128]
+        for a_type in [torch.float16, torch.bfloat16]
+    ),
    # AWQ style
-    *(TypeConfig(act_type=a_type,
-                 weight_type=w_type,
-                 output_type=None,
-                 group_scale_type=a_type,
-                 group_zero_type=a_type,
-                 channel_scale_type=None,
-                 token_scale_type=None)
-      for w_type in [scalar_types.uint4, scalar_types.uint8]
-      for a_type in [torch.float16, torch.bfloat16]),
+    *(
+        TypeConfig(
+            act_type=a_type,
+            weight_type=w_type,
+            output_type=None,
+            group_scale_type=a_type,
+            group_zero_type=a_type,
+            channel_scale_type=None,
+            token_scale_type=None,
+        )
+        for w_type in [scalar_types.uint4, scalar_types.uint8]
+        for a_type in [torch.float16, torch.bfloat16]
+    ),
    # # QQQ style
    # *(TypeConfig(act_type=torch.int8,
    #              weight_type=scalar_types.uint4b8,
@@ -133,17 +143,18 @@ def maybe_convert_zeropoints(zps: Optional[torch.Tensor], s: torch.Tensor):
    return zps if zps is None else -1 * s * (zps.to(s.dtype))


-def group_size_valid(shape: tuple[int, int, int],
-                     group_size: Optional[int]) -> bool:
+def group_size_valid(shape: tuple[int, int, int], group_size: Optional[int]) -> bool:
    return group_size is None or group_size == -1 or shape[2] % group_size == 0


-def machete_quantize_and_pack(atype: torch.dtype,
-                              w: torch.Tensor,
-                              wtype: ScalarType,
-                              stype: Optional[torch.dtype],
-                              group_size: Optional[int],
-                              zero_points: bool = False):
+def machete_quantize_and_pack(
+    atype: torch.dtype,
+    w: torch.Tensor,
+    wtype: ScalarType,
+    stype: Optional[torch.dtype],
+    group_size: Optional[int],
+    zero_points: bool = False,
+):
    assert wtype.is_integer(), "TODO: support floating point weights"

    w_ref, w_q, w_s, w_zp = quantize_weights(
@@ -152,7 +163,8 @@ def machete_quantize_and_pack(atype: torch.dtype,
        group_size=group_size,
        zero_points=zero_points,
        # to match how the kernel applies zps
-        ref_zero_points_after_scales=True)
+        ref_zero_points_after_scales=True,
+    )

    w_q = pack_rows(w_q, wtype.size_bits, *w_q.shape)
    w_q = w_q.t().contiguous().t()  # convert to col major
@@ -163,15 +175,18 @@ def machete_quantize_and_pack(atype: torch.dtype,
    return w_ref, w_q_machete, w_s, w_zp


-def create_test_tensors(shape: tuple[int, int, int],
-                        types: TypeConfig,
-                        group_size: Optional[int],
-                        subset_stride_factor: Optional[int] = None) -> Tensors:
+def create_test_tensors(
+    shape: tuple[int, int, int],
+    types: TypeConfig,
+    group_size: Optional[int],
+    subset_stride_factor: Optional[int] = None,
+) -> Tensors:
    m, n, k = shape
    factor = subset_stride_factor or 1

-    print("create_test_tensors, shape:", shape, "types:", types, "group_size:",
-          group_size)
+    print(
+        "create_test_tensors, shape:", shape, "types:", types, "group_size:", group_size
+    )

    a = rand_data((m * factor, k * factor), types.act_type, scale=3, offset=2)
    w = rand_data((k * factor, n * factor), types.act_type, scale=3, offset=1)
@@ -186,8 +201,13 @@ def create_test_tensors(shape: tuple[int, int, int],
        w = w.to(torch.float16)

    w_ref, w_q_packed, w_s, w_zp = machete_quantize_and_pack(
-        a.dtype, w, types.weight_type, types.group_scale_type, group_size,
-        types.group_zero_type is not None)
+        a.dtype,
+        w,
+        types.weight_type,
+        types.group_scale_type,
+        group_size,
+        types.group_zero_type is not None,
+    )

    if not a.dtype.is_floating_point:
        aiinfo = torch.iinfo(a.dtype)
@@ -196,35 +216,47 @@ def create_test_tensors(shape: tuple[int, int, int],
    a_ref = a.to(torch.float32)
    w_ref = w_ref.to(torch.float32)

-    w_ch_s = None if types.channel_scale_type is None else\
-        rand_data((n,), types.channel_scale_type)
-    w_tok_s = None if types.token_scale_type is None else\
-        rand_data((m,), types.token_scale_type)
+    w_ch_s = (
+        None
+        if types.channel_scale_type is None
+        else rand_data((n,), types.channel_scale_type)
+    )
+    w_tok_s = (
+        None
+        if types.token_scale_type is None
+        else rand_data((m,), types.token_scale_type)
+    )

-    return Tensors(w_ref=w_ref,
-                   a_ref=a_ref,
-                   a=a,
-                   w_q=w_q_packed,
-                   w_g_s=w_s,
-                   w_g_zp=maybe_convert_zeropoints(w_zp, w_s),
-                   w_ch_s=w_ch_s,
-                   w_tok_s=w_tok_s)
+    return Tensors(
+        w_ref=w_ref,
+        a_ref=a_ref,
+        a=a,
+        w_q=w_q_packed,
+        w_g_s=w_s,
+        w_g_zp=maybe_convert_zeropoints(w_zp, w_s),
+        w_ch_s=w_ch_s,
+        w_tok_s=w_tok_s,
+    )


 # None stype means scales use the same dtype as a
-def machete_mm_test_helper(types: TypeConfig,
-                           tensors: Tensors,
-                           group_size: Optional[int] = None,
-                           schedule: Optional[str] = None):
+def machete_mm_test_helper(
+    types: TypeConfig,
+    tensors: Tensors,
+    group_size: Optional[int] = None,
+    schedule: Optional[str] = None,
+):
    output_ref = torch.matmul(tensors.a_ref, tensors.w_ref)
    output_ref_type = output_ref.dtype

    if tensors.w_ch_s is not None:
-        output_ref = (output_ref.to(tensors.w_ch_s.dtype) *
-                      tensors.w_ch_s.unsqueeze(0)).to(output_ref_type)
+        output_ref = (
+            output_ref.to(tensors.w_ch_s.dtype) * tensors.w_ch_s.unsqueeze(0)
+        ).to(output_ref_type)
    if tensors.w_tok_s is not None:
-        output_ref = (output_ref.to(tensors.w_tok_s.dtype) *
-                      tensors.w_tok_s.unsqueeze(1)).to(output_ref_type)
+        output_ref = (
+            output_ref.to(tensors.w_tok_s.dtype) * tensors.w_tok_s.unsqueeze(1)
+        ).to(output_ref_type)

    output = ops.machete_mm(
        a=tensors.a,
@@ -245,23 +277,23 @@ def machete_mm_test_helper(types: TypeConfig,
    # Relax atol as our reduction dim becomes larger (more rounding error)
    # Relax atol when we have zeropoints since the way machete applies
    #  zeropoints (after scales) causes noise around 0
-    atol = 1 if tensors.w_g_zp is not None\
+    atol = (
+        1
+        if tensors.w_g_zp is not None
        else min(5e-2 * math.sqrt(tensors.a.shape[1]), 1)
+    )
    rtol = 1e-1 if tensors.a.element_size() >= 2 else 2e-1
-    torch.testing.assert_close(output,
-                               output_ref.to(output.dtype),
-                               rtol=rtol,
-                               atol=atol)
+    torch.testing.assert_close(
+        output, output_ref.to(output.dtype), rtol=rtol, atol=atol
+    )


-@pytest.mark.skipif(not IS_SUPPORTED_BY_GPU,
-                    reason="Machete is not supported on this GPU type.")
-@pytest.mark.parametrize("shape",
-                         MNK_SHAPES,
-                         ids=lambda x: "x".join(str(v) for v in x))
+@pytest.mark.skipif(
+    not IS_SUPPORTED_BY_GPU, reason="Machete is not supported on this GPU type."
+)
+@pytest.mark.parametrize("shape", MNK_SHAPES, ids=lambda x: "x".join(str(v) for v in x))
@pytest.mark.parametrize("types", TEST_TYPES)
 def test_machete_all_schedules(shape, types: TypeConfig):
-
    group_sizes: list[Optional[int]] = []
    if types.group_scale_type is None:
        group_sizes = [None]
@@ -275,20 +307,20 @@ def test_machete_all_schedules(shape, types: TypeConfig):
        tensors = create_test_tensors(shape, types, group_size)
        print(f"MNK = {shape}")
        for schedule in ops.machete_supported_schedules(
-                types.act_type,
-                types.weight_type,
-                group_scales_type=types.group_scale_type,
-                group_zeros_type=types.group_scale_type,
-                out_type=types.output_type):
+            types.act_type,
+            types.weight_type,
+            group_scales_type=types.group_scale_type,
+            group_zeros_type=types.group_scale_type,
+            out_type=types.output_type,
+        ):
            print(f"Testing schedule {schedule}")
            machete_mm_test_helper(types, tensors, group_size, schedule)


-@pytest.mark.skipif(not IS_SUPPORTED_BY_GPU,
-                    reason="Machete is not supported on this GPU type.")
-@pytest.mark.parametrize("shape",
-                         MNK_SHAPES,
-                         ids=lambda x: "x".join(str(v) for v in x))
+@pytest.mark.skipif(
+    not IS_SUPPORTED_BY_GPU, reason="Machete is not supported on this GPU type."
+)
+@pytest.mark.parametrize("shape", MNK_SHAPES, ids=lambda x: "x".join(str(v) for v in x))
@pytest.mark.parametrize("types", TEST_TYPES)
 def test_machete_heuristic(shape, types: TypeConfig):
    group_sizes: list[Optional[int]] = []
@@ -306,19 +338,22 @@ def test_machete_heuristic(shape, types: TypeConfig):


 # Test working on other devices
-@pytest.mark.skipif(not IS_SUPPORTED_BY_GPU,
-                    reason="Machete is not supported on this GPU type.")
+@pytest.mark.skipif(
+    not IS_SUPPORTED_BY_GPU, reason="Machete is not supported on this GPU type."
+)
@pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_machete_devices(device: str):
    group_size = 128

-    type_config = TypeConfig(act_type=torch.float16,
-                             weight_type=scalar_types.uint4b8,
-                             output_type=None,
-                             group_scale_type=torch.float16,
-                             group_zero_type=None,
-                             channel_scale_type=None,
-                             token_scale_type=None)
+    type_config = TypeConfig(
+        act_type=torch.float16,
+        weight_type=scalar_types.uint4b8,
+        output_type=None,
+        group_scale_type=torch.float16,
+        group_zero_type=None,
+        channel_scale_type=None,
+        token_scale_type=None,
+    )

    tensors = create_test_tensors((512, 4096, 4096), type_config, group_size)

@@ -331,29 +366,30 @@ def test_machete_devices(device: str):


 # Test working with a subset of A and B
-@pytest.mark.skipif(not IS_SUPPORTED_BY_GPU,
-                    reason="Machete is not supported on this GPU type.")
+@pytest.mark.skipif(
+    not IS_SUPPORTED_BY_GPU, reason="Machete is not supported on this GPU type."
+)
 def test_machete_subset():
    group_size = 128

-    type_config = TypeConfig(act_type=torch.float16,
-                             weight_type=scalar_types.uint4b8,
-                             output_type=None,
-                             group_scale_type=torch.float16,
-                             group_zero_type=None,
-                             channel_scale_type=None,
-                             token_scale_type=None)
+    type_config = TypeConfig(
+        act_type=torch.float16,
+        weight_type=scalar_types.uint4b8,
+        output_type=None,
+        group_scale_type=torch.float16,
+        group_zero_type=None,
+        channel_scale_type=None,
+        token_scale_type=None,
+    )

-    tensors = create_test_tensors((512, 4096, 4096),
-                                  type_config,
-                                  group_size,
-                                  subset_stride_factor=2)
+    tensors = create_test_tensors(
+        (512, 4096, 4096), type_config, group_size, subset_stride_factor=2
+    )
    machete_mm_test_helper(type_config, tensors, group_size)


 # Test to make sure cuda graphs work
 class MacheteLayer(torch.nn.Module):
-
    def __init__(self, **kwargs):
        super().__init__()
        self.kwargs = kwargs
@@ -362,8 +398,9 @@ class MacheteLayer(torch.nn.Module):
        return ops.machete_mm(a=a, **self.kwargs)


-@pytest.mark.skipif(not IS_SUPPORTED_BY_GPU,
-                    reason="Machete is not supported on this GPU type.")
+@pytest.mark.skipif(
+    not IS_SUPPORTED_BY_GPU, reason="Machete is not supported on this GPU type."
+)
 def test_machete_cuda_graph():
    m, n, k = 512, 4096, 4096

@@ -375,7 +412,8 @@ def test_machete_cuda_graph():
    zero_points = False

    w_ref, w_q_packed, w_s, w_zp = machete_quantize_and_pack(
-        a.dtype, b, wtype, stype, group_size, zero_points)
+        a.dtype, b, wtype, stype, group_size, zero_points
+    )

    # Construct a trivial model with a single layer that calls a machete kernel
    model = MacheteLayer(