Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/vllm/profiler/layerwise_profile.py
+++ b/vllm/profiler/layerwise_profile.py
@@ -12,21 +12,26 @@ from torch._C._profiler import _EventType, _ExperimentalConfig, _ProfilerEvent
 from torch.autograd.profiler import FunctionEvent
 from torch.profiler import ProfilerActivity, profile

-from vllm.profiler.utils import (TablePrinter, event_has_module,
-                                 event_is_torch_op, event_module_repr,
-                                 event_torch_op_stack_trace, indent_string)
+from vllm.profiler.utils import (
+    TablePrinter,
+    event_has_module,
+    event_is_torch_op,
+    event_module_repr,
+    event_torch_op_stack_trace,
+    indent_string,
+)


@dataclass
 class _ModuleTreeNode:
    event: _ProfilerEvent
-    parent: Optional['_ModuleTreeNode'] = None
-    children: list['_ModuleTreeNode'] = field(default_factory=list)
+    parent: Optional["_ModuleTreeNode"] = None
+    children: list["_ModuleTreeNode"] = field(default_factory=list)
    trace: str = ""

    @property
    def is_leaf(self):
-        return (self.event.children is None or len(self.event.children) == 0)
+        return self.event.children is None or len(self.event.children) == 0

    @property
    def is_torch_op(self):
@@ -34,8 +39,10 @@ class _ModuleTreeNode:

    @property
    def is_cuda(self):
-        return (self.event.tag == _EventType.Kineto
-                and self.event.typed[1].device_type == DeviceType.CUDA)
+        return (
+            self.event.tag == _EventType.Kineto
+            and self.event.typed[1].device_type == DeviceType.CUDA
+        )


@dataclass
@@ -68,8 +75,7 @@ class _StatsTreeNode:
@dataclass
 class LayerwiseProfileResults(profile):
    _kineto_results: _ProfilerResult
-    _kineto_event_correlation_map: dict[int,
-                                        list[_KinetoEvent]] = field(init=False)
+    _kineto_event_correlation_map: dict[int, list[_KinetoEvent]] = field(init=False)
    _event_correlation_map: dict[int, list[FunctionEvent]] = field(init=False)
    _module_tree: list[_ModuleTreeNode] = field(init=False)
    _model_stats_tree: list[_StatsTreeNode] = field(init=False)
@@ -84,11 +90,9 @@ class LayerwiseProfileResults(profile):
        self._build_stats_trees()

    def print_model_table(self, column_widths: dict[str, int] = None):
-        _column_widths = dict(name=60,
-                              cpu_time_us=12,
-                              cuda_time_us=12,
-                              pct_cuda_time=12,
-                              trace=60)
+        _column_widths = dict(
+            name=60, cpu_time_us=12, cuda_time_us=12, pct_cuda_time=12, trace=60
+        )
        if column_widths:
            _column_widths.update(**column_widths)
        filtered_model_table = [
@@ -99,78 +103,76 @@ class LayerwiseProfileResults(profile):
        TablePrinter(ModelStatsEntry, _column_widths).print_table(
            self._indent_row_names_based_on_depth(
                filtered_model_table,
-                indent_style=lambda indent: "|" + "-" * indent + " "))
+                indent_style=lambda indent: "|" + "-" * indent + " ",
+            )
+        )

    def print_summary_table(self, column_widths: dict[str, int] = None):
-        _column_widths = dict(name=80,
-                              cuda_time_us=12,
-                              pct_cuda_time=12,
-                              invocations=15)
+        _column_widths = dict(
+            name=80, cuda_time_us=12, pct_cuda_time=12, invocations=15
+        )
        if column_widths:
            _column_widths.update(**column_widths)
-        filtered_summary_table = [(depth, row)
-                                  for depth, row in self._flatten_stats_tree(
-                                      self._summary_stats_tree)
-                                  if row.cuda_time_us > 0]
+        filtered_summary_table = [
+            (depth, row)
+            for depth, row in self._flatten_stats_tree(self._summary_stats_tree)
+            if row.cuda_time_us > 0
+        ]
        TablePrinter(SummaryStatsEntry, _column_widths).print_table(
            self._indent_row_names_based_on_depth(
                filtered_summary_table,
-                indent_style=lambda indent: "|" + "-" * indent + " "))
+                indent_style=lambda indent: "|" + "-" * indent + " ",
+            )
+        )

    def export_model_stats_table_csv(self, filename: str):
-        df = pd.DataFrame([
-            asdict(row)
-            for _, row in self._flatten_stats_tree(self._model_stats_tree)
-        ])
+        df = pd.DataFrame(
+            [asdict(row) for _, row in self._flatten_stats_tree(self._model_stats_tree)]
+        )
        df.to_csv(filename)

    def export_summary_stats_table_csv(self, filename: str):
-        df = pd.DataFrame([
-            asdict(row)
-            for _, row in self._flatten_stats_tree(self._summary_stats_tree)
-        ])
+        df = pd.DataFrame(
+            [
+                asdict(row)
+                for _, row in self._flatten_stats_tree(self._summary_stats_tree)
+            ]
+        )
        df.to_csv(filename)

    def convert_stats_to_dict(self) -> dict[str, Any]:
        return {
-            "metadata": {
-                "num_running_seqs": self.num_running_seqs
-            },
-            "summary_stats":
-            self._convert_stats_tree_to_dict(self._summary_stats_tree),
-            "model_stats":
-            self._convert_stats_tree_to_dict(self._model_stats_tree)
+            "metadata": {"num_running_seqs": self.num_running_seqs},
+            "summary_stats": self._convert_stats_tree_to_dict(self._summary_stats_tree),
+            "model_stats": self._convert_stats_tree_to_dict(self._model_stats_tree),
        }

    @staticmethod
-    def _indent_row_names_based_on_depth(depths_rows: list[tuple[int,
-                                                                 StatsEntry]],
-                                         indent_style: Union[Callable[[int],
-                                                                      str],
-                                                             str] = " "):
+    def _indent_row_names_based_on_depth(
+        depths_rows: list[tuple[int, StatsEntry]],
+        indent_style: Union[Callable[[int], str], str] = " ",
+    ):
        indented_rows = []
        for depth, row in depths_rows:
            if row.cuda_time_us == 0:
                continue
            indented_row = copy.deepcopy(row)
-            indented_row.name = indent_string(indented_row.name, depth,
-                                              indent_style)
+            indented_row.name = indent_string(indented_row.name, depth, indent_style)
            indented_rows.append(indented_row)
        return indented_rows

    def _build_correlation_map(self):
        self._kineto_event_correlation_map = defaultdict(list)
        for event in self._kineto_results.events():
-            self._kineto_event_correlation_map[event.correlation_id()].append(
-                event)
+            self._kineto_event_correlation_map[event.correlation_id()].append(event)

    def _build_module_tree(self):
        self._module_tree = []
        event_tree = self._kineto_results.experimental_event_tree()

-        def _df_traversal(event: _ProfilerEvent,
-                          curr_node: Optional[_ModuleTreeNode] = None):
-
+        def _df_traversal(
+            event: _ProfilerEvent, curr_node: Optional[_ModuleTreeNode] = None
+        ):
            # For the tensor parallel case for now only look at task 1
            if event.start_tid != 1:
                return
@@ -183,13 +185,15 @@ class LayerwiseProfileResults(profile):
                    self._module_tree.append(node)
                curr_node = node

-            is_leaf = (event.children is None or len(event.children) == 0)
+            is_leaf = event.children is None or len(event.children) == 0
            if is_leaf and curr_node:
                node = _ModuleTreeNode(
                    event=event,
                    parent=curr_node,
                    trace=event_torch_op_stack_trace(
-                        event, until=lambda x: event_has_module(x)))
+                        event, until=lambda x: event_has_module(x)
+                    ),
+                )
                curr_node.children.append(node)
                curr_node = node

@@ -203,31 +207,31 @@ class LayerwiseProfileResults(profile):
        if node.event.tag != _EventType.Kineto:
            return None
        correlated_kineto_events = self._kineto_event_correlation_map.get(
-            node.event.correlation_id, [])
-        iterator = (x for x in correlated_kineto_events
-                    if x.device_type() == DeviceType.CUDA
-                    and x.name() == node.event.name)
+            node.event.correlation_id, []
+        )
+        iterator = (
+            x
+            for x in correlated_kineto_events
+            if x.device_type() == DeviceType.CUDA and x.name() == node.event.name
+        )
        return next(iterator, None)

    def _cumulative_cuda_time(self, node: _ModuleTreeNode):
-        'Return cuda time in microseconds'
+        "Return cuda time in microseconds"

        def _cumulative_cuda_time_recursive(node: _ModuleTreeNode):
-            if node.is_leaf and (gpu_kineto_event :=
-                                 self._get_kineto_gpu_event(node)):
+            if node.is_leaf and (gpu_kineto_event := self._get_kineto_gpu_event(node)):
                return gpu_kineto_event.duration_ns() / 1000.0
            else:
                cumulative_cuda_time = 0
                for child in node.children:
-                    cumulative_cuda_time += _cumulative_cuda_time_recursive(
-                        child)
+                    cumulative_cuda_time += _cumulative_cuda_time_recursive(child)
                return cumulative_cuda_time

        return _cumulative_cuda_time_recursive(node)

    def _total_cuda_time(self):
-        return sum(
-            [self._cumulative_cuda_time(root) for root in self._module_tree])
+        return sum([self._cumulative_cuda_time(root) for root in self._module_tree])

    def _build_stats_trees(self):
        summary_dict: dict[str, _StatsTreeNode] = {}
@@ -239,38 +243,42 @@ class LayerwiseProfileResults(profile):
        def build_summary_stats_tree_df(
            node: _ModuleTreeNode,
            parent: Optional[_StatsTreeNode] = None,
-            summary_trace: tuple[str] = ()):
-
+            summary_trace: tuple[str] = (),
+        ):
            if event_has_module(node.event):
                name = event_module_repr(node.event)
                cuda_time_us = self._cumulative_cuda_time(node)
-            elif (gpu_kineto_event := self._get_kineto_gpu_event(node)):
+            elif gpu_kineto_event := self._get_kineto_gpu_event(node):
                name = gpu_kineto_event.name()
                cuda_time_us = gpu_kineto_event.duration_ns() / 1000.0
            else:
                return None

-            summary_trace = summary_trace + (name, )
+            summary_trace = summary_trace + (name,)
            if summary_trace in summary_dict:
                entry = summary_dict[summary_trace].entry
                entry.cuda_time_us += cuda_time_us
                entry.invocations += 1
                entry.pct_cuda_time = pct_cuda_time(entry.cuda_time_us)
            else:
-                new_node = _StatsTreeNode(entry=SummaryStatsEntry(
-                    name=name,
-                    cuda_time_us=cuda_time_us,
-                    pct_cuda_time=pct_cuda_time(cuda_time_us),
-                    invocations=1),
-                                          children=[],
-                                          parent=parent)
+                new_node = _StatsTreeNode(
+                    entry=SummaryStatsEntry(
+                        name=name,
+                        cuda_time_us=cuda_time_us,
+                        pct_cuda_time=pct_cuda_time(cuda_time_us),
+                        invocations=1,
+                    ),
+                    children=[],
+                    parent=parent,
+                )
                if parent:
                    parent.children.append(new_node)
                summary_dict[summary_trace] = new_node

            for child in node.children:
-                build_summary_stats_tree_df(child, summary_dict[summary_trace],
-                                            summary_trace)
+                build_summary_stats_tree_df(
+                    child, summary_dict[summary_trace], summary_trace
+                )

            return summary_dict[summary_trace]

@@ -278,14 +286,17 @@ class LayerwiseProfileResults(profile):
        for root in self._module_tree:
            self._summary_stats_tree.append(build_summary_stats_tree_df(root))

-        def build_model_stats_tree_df(node: _ModuleTreeNode,
-                                      parent: Optional[_StatsTreeNode] = None):
-            if event_has_module(node.event, ):
+        def build_model_stats_tree_df(
+            node: _ModuleTreeNode, parent: Optional[_StatsTreeNode] = None
+        ):
+            if event_has_module(
+                node.event,
+            ):
                name = event_module_repr(node.event)
                cuda_time_us = self._cumulative_cuda_time(node)
                cpu_time_us = node.event.duration_time_ns / 1000
                trace = ""
-            elif (gpu_kineto_event := self._get_kineto_gpu_event(node)):
+            elif gpu_kineto_event := self._get_kineto_gpu_event(node):
                name = gpu_kineto_event.name()
                cuda_time_us = gpu_kineto_event.duration_ns() / 1000.0
                cpu_time_us = 0
@@ -293,14 +304,17 @@ class LayerwiseProfileResults(profile):
            else:
                return None

-            new_node = _StatsTreeNode(entry=ModelStatsEntry(
-                name=name,
-                cpu_time_us=cpu_time_us,
-                cuda_time_us=cuda_time_us,
-                pct_cuda_time=pct_cuda_time(cuda_time_us),
-                trace=trace),
-                                      parent=parent,
-                                      children=[])
+            new_node = _StatsTreeNode(
+                entry=ModelStatsEntry(
+                    name=name,
+                    cpu_time_us=cpu_time_us,
+                    cuda_time_us=cuda_time_us,
+                    pct_cuda_time=pct_cuda_time(cuda_time_us),
+                    trace=trace,
+                ),
+                parent=parent,
+                children=[],
+            )
            if parent:
                parent.children.append(new_node)

@@ -314,7 +328,8 @@ class LayerwiseProfileResults(profile):
            self._model_stats_tree.append(build_model_stats_tree_df(root))

    def _flatten_stats_tree(
-            self, tree: list[_StatsTreeNode]) -> list[tuple[int, StatsEntry]]:
+        self, tree: list[_StatsTreeNode]
+    ) -> list[tuple[int, StatsEntry]]:
        entries: list[tuple[int, StatsEntry]] = []

        def df_traversal(node: _StatsTreeNode, depth=0):
@@ -327,15 +342,11 @@ class LayerwiseProfileResults(profile):

        return entries

-    def _convert_stats_tree_to_dict(self,
-                                    tree: list[_StatsTreeNode]) -> list[dict]:
+    def _convert_stats_tree_to_dict(self, tree: list[_StatsTreeNode]) -> list[dict]:
        root_dicts: list[dict] = []

        def df_traversal(node: _StatsTreeNode, curr_json_list: list[dict]):
-            curr_json_list.append({
-                "entry": asdict(node.entry),
-                "children": []
-            })
+            curr_json_list.append({"entry": asdict(node.entry), "children": []})
            for child in node.children:
                df_traversal(child, curr_json_list[-1]["children"])

@@ -346,7 +357,6 @@ class LayerwiseProfileResults(profile):


 class layerwise_profile(profile):
-
    def __init__(self, num_running_seqs: Optional[int] = None):
        """
        layerwise profile constructor.
@@ -361,7 +371,8 @@ class layerwise_profile(profile):
            record_shapes=True,
            with_stack=True,
            with_modules=True,
-            experimental_config=_ExperimentalConfig(verbose=True))
+            experimental_config=_ExperimentalConfig(verbose=True),
+        )

        self.num_running_seqs = num_running_seqs

@@ -371,5 +382,5 @@ class layerwise_profile(profile):
    def __exit__(self, exc_type, exc_val, exc_tb):
        super().__exit__(exc_type, exc_val, exc_tb)
        self.results = LayerwiseProfileResults(
-            self.profiler.kineto_results,
-            num_running_seqs=self.num_running_seqs)
+            self.profiler.kineto_results, num_running_seqs=self.num_running_seqs
+        )
--- a/vllm/profiler/utils.py
+++ b/vllm/profiler/utils.py
@@ -30,9 +30,9 @@ def trim_string_back(string, width):


 class TablePrinter:
-
-    def __init__(self, row_cls: type[dataclasses.dataclass],
-                 column_widths: dict[str, int]):
+    def __init__(
+        self, row_cls: type[dataclasses.dataclass], column_widths: dict[str, int]
+    ):
        self.row_cls = row_cls
        self.fieldnames = [x.name for x in dataclasses.fields(row_cls)]
        self.column_widths = column_widths
@@ -46,16 +46,18 @@ class TablePrinter:

    def _print_header(self):
        for i, f in enumerate(self.fieldnames):
-            last = (i == len(self.fieldnames) - 1)
+            last = i == len(self.fieldnames) - 1
            col_width = self.column_widths[f]
-            print(trim_string_back(f, col_width).ljust(col_width),
-                  end=" | " if not last else "\n")
+            print(
+                trim_string_back(f, col_width).ljust(col_width),
+                end=" | " if not last else "\n",
+            )

    def _print_row(self, row):
        assert isinstance(row, self.row_cls)

        for i, f in enumerate(self.fieldnames):
-            last = (i == len(self.fieldnames) - 1)
+            last = i == len(self.fieldnames) - 1
            col_width = self.column_widths[f]
            val = getattr(row, f)

@@ -75,9 +77,9 @@ class TablePrinter:
        print("=" * (total_col_width + 3 * (len(self.column_widths) - 1)))


-def indent_string(string: str,
-                  indent: int,
-                  indent_style: Union[Callable[[int], str], str] = " ") -> str:
+def indent_string(
+    string: str, indent: int, indent_style: Union[Callable[[int], str], str] = " "
+) -> str:
    if indent:
        if isinstance(indent_style, str):
            return indent_style * indent + string
@@ -111,15 +113,14 @@ def event_arg_repr(arg) -> str:
    elif isinstance(arg, tuple):
        return f"({', '.join([event_arg_repr(x) for x in arg])})"
    else:
-        assert isinstance(arg,
-                          _TensorMetadata), f"Unsupported type: {type(arg)}"
-        sizes_str = ', '.join([str(x) for x in arg.sizes])
+        assert isinstance(arg, _TensorMetadata), f"Unsupported type: {type(arg)}"
+        sizes_str = ", ".join([str(x) for x in arg.sizes])
        return f"{str(arg.dtype).replace('torch.', '')}[{sizes_str}]"


 def event_torch_op_repr(event: _ProfilerEvent) -> str:
    assert event.tag == _EventType.TorchOp
-    args_str = ', '.join([event_arg_repr(x) for x in event.typed[1].inputs])
+    args_str = ", ".join([event_arg_repr(x) for x in event.typed[1].inputs])
    return f"{event.name}({args_str})".replace("aten::", "")


@@ -127,15 +128,17 @@ def event_module_repr(event: _ProfilerEvent) -> str:
    assert event_has_module(event)
    module = event.typed[1].module
    if module.parameters and len(module.parameters) > 0:
-        args_str = ', '.join(
-            [f'{x[0]}={event_arg_repr(x[1])}' for x in module.parameters])
+        args_str = ", ".join(
+            [f"{x[0]}={event_arg_repr(x[1])}" for x in module.parameters]
+        )
        return f"{module.cls_name}({args_str})"
    else:
        return module.cls_name


-def event_torch_op_stack_trace(curr_event: _ProfilerEvent,
-                               until: Callable[[_ProfilerEvent], bool]) -> str:
+def event_torch_op_stack_trace(
+    curr_event: _ProfilerEvent, until: Callable[[_ProfilerEvent], bool]
+) -> str:
    trace = ""
    curr_event = curr_event.parent
    while curr_event and not until(curr_event):