Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tools/profiler/nsys_profile_tools/gputrc2graph.py
+++ b/tools/profiler/nsys_profile_tools/gputrc2graph.py
@@ -1,10 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-    This generates gpu kernel analysis output from nsys rep. Will call nsys
-    stats  -r cuda_gpu_kern_trace, get non-overlapped gpu cycles, then generate
-    csv and html output for analysis
+This generates gpu kernel analysis output from nsys rep. Will call nsys
+stats  -r cuda_gpu_kern_trace, get non-overlapped gpu cycles, then generate
+csv and html output for analysis
 """
+
 import argparse
 import logging
 import os
@@ -16,13 +17,13 @@ logger = logging.getLogger(__name__)

 # helper data class for annotating kernels
 def load_engine_model():
-    """ returns engine_model built from all json files in the current dir """
+    """returns engine_model built from all json files in the current dir"""
    import glob
    import json
+
    engine_model = {}

-    json_files = glob.glob(
-        os.path.join(os.path.dirname(__file__) or ".", "*.json"))
+    json_files = glob.glob(os.path.join(os.path.dirname(__file__) or ".", "*.json"))
    for fname in json_files:
        with open(fname, encoding="utf-8") as f:
            engine_model.update(json.load(f))
@@ -30,54 +31,54 @@ def load_engine_model():


 class GPUTrace2Graph:
-    """ 
-        Parses output of nsys report, generates csv and bar chart output
+    """
+    Parses output of nsys report, generates csv and bar chart output
    """

    def __init__(self):
        import pandas as pd  # avoid importing till needed
+
        self.pd = pd
        self.pd.options.mode.copy_on_write = True

    # helper functions for generating trace->summary csvs
    def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file):
-        logger.info('loading %s', in_file)
+        logger.info("loading %s", in_file)
        df = self.pd.read_csv(
-            in_file,
-            usecols=['Start (ns)', 'Duration (ns)', 'Device', 'Strm', 'Name'])
-        df['End (ns)'] = df['Start (ns)'] + df['Duration (ns)']
+            in_file, usecols=["Start (ns)", "Duration (ns)", "Device", "Strm", "Name"]
+        )
+        df["End (ns)"] = df["Start (ns)"] + df["Duration (ns)"]
        df = self.sum_non_overlapping_intervals(df)
        # get ready to print table with elapsed times per kernel
-        df['Instances'] = 1
-        df_sum = df.groupby('Name', as_index=False).agg({
-            'Elapsed Time (ns)': 'sum',
-            'Duration (ns)': 'sum',
-            'Instances': 'size'
-        })
+        df["Instances"] = 1
+        df_sum = df.groupby("Name", as_index=False).agg(
+            {"Elapsed Time (ns)": "sum", "Duration (ns)": "sum", "Instances": "size"}
+        )

        # generate csv
-        df_sum['Total Time (sec)'] = df_sum['Duration (ns)'] / 1e9
-        df_sum['Elapsed Time (sec)'] = df_sum['Elapsed Time (ns)'] / 1e9
-        df_sum = df_sum.sort_values(by='Elapsed Time (sec)', ascending=False)
-        df_sum[['Elapsed Time (sec)', 'Total Time (sec)', 'Instances',
-                'Name']].to_csv(out_file, index=False)
+        df_sum["Total Time (sec)"] = df_sum["Duration (ns)"] / 1e9
+        df_sum["Elapsed Time (sec)"] = df_sum["Elapsed Time (ns)"] / 1e9
+        df_sum = df_sum.sort_values(by="Elapsed Time (sec)", ascending=False)
+        df_sum[["Elapsed Time (sec)", "Total Time (sec)", "Instances", "Name"]].to_csv(
+            out_file, index=False
+        )

    def sum_non_overlapping_intervals(self, df):
-        """ 
-            returns new sorted df with Elapsed Time (ns) column using 
-            vectorized operations 
+        """
+        returns new sorted df with Elapsed Time (ns) column using
+        vectorized operations
        """
        logger.info("sorting %s trace records by start time", str(df.shape))

        # Sort by start time and reset index
-        df = df.sort_values(by='Start (ns)').reset_index(drop=True)
+        df = df.sort_values(by="Start (ns)").reset_index(drop=True)

        # Initialize elapsed time as duration
-        df['Elapsed Time (ns)'] = df['Duration (ns)']
+        df["Elapsed Time (ns)"] = df["Duration (ns)"]

        # Get numpy arrays for faster operations
-        starts = df['Start (ns)'].values
-        ends = df['End (ns)'].values
+        starts = df["Start (ns)"].values
+        ends = df["End (ns)"].values

        # Keep track of current interval end
        current_end = ends[0]
@@ -85,16 +86,17 @@ class GPUTrace2Graph:
        # Update current_end for overlapping intervals
        for i in range(1, len(df)):
            if i % display_units == 0:
-                print(f'processing trace: {int(i/len(df) * 100)} %', end="\r")
+                print(f"processing trace: {int(i / len(df) * 100)} %", end="\r")
            if starts[i] <= current_end:
                if ends[i] > current_end:
                    # Partial overlap
-                    df.iloc[i, df.columns.get_loc('Elapsed Time (ns)'
-                                                  )] = ends[i] - current_end
+                    df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = (
+                        ends[i] - current_end
+                    )
                    current_end = ends[i]
                else:
                    # Complete overlap
-                    df.iloc[i, df.columns.get_loc('Elapsed Time (ns)')] = 0
+                    df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = 0
            else:
                # No overlap
                current_end = ends[i]
@@ -103,147 +105,167 @@ class GPUTrace2Graph:

    # functions for generating html files
    def make_html(self, df, output_dir, title):
-        """ make html graph from df """
+        """make html graph from df"""
        import plotly.express as px
+
        if df.empty:
            return
-        output_name = output_dir + '/result'
+        output_name = output_dir + "/result"
        if not title:
-            title = 'Model_Engine'
-        x = 'Model_Engine'
-        y = 'Elapsed Time (sec)'
-        color = 'Category'
+            title = "Model_Engine"
+        x = "Model_Engine"
+        y = "Elapsed Time (sec)"
+        color = "Category"
        """ generate kernel mapping table  """
        # Sort Model_Engine categories by last field after underscore
-        df['Model_Engine'] = self.pd.Categorical(
-            df['Model_Engine'],
-            sorted(df['Model_Engine'].unique(),
-                   key=lambda x: x.split('_')[-1]))
-        df[['Model_Engine', color, 'Instances', 'Name',
-            y]].sort_values(by=color).to_csv(f'{output_name}.csv', index=False)
-        graph = px.histogram(df.round(2),
-                             x=x,
-                             y=y,
-                             title=(f'{y} for {title}'),
-                             color=color,
-                             text_auto=True)
+        df["Model_Engine"] = self.pd.Categorical(
+            df["Model_Engine"],
+            sorted(df["Model_Engine"].unique(), key=lambda x: x.split("_")[-1]),
+        )
+        df[["Model_Engine", color, "Instances", "Name", y]].sort_values(
+            by=color
+        ).to_csv(f"{output_name}.csv", index=False)
+        graph = px.histogram(
+            df.round(2),
+            x=x,
+            y=y,
+            title=(f"{y} for {title}"),
+            color=color,
+            text_auto=True,
+        )
        # wrap x axis labels
        graph.update_xaxes(automargin=True)
-        graph.write_html(f'{output_name}.html')
+        graph.write_html(f"{output_name}.html")
        """
            Generate data table with columns per Model_Engine into result.html
        """
-        pivot_df = df.pivot_table(values='Elapsed Time (sec)',
-                                  index='Category',
-                                  columns='Model_Engine',
-                                  aggfunc='sum',
-                                  observed=False).round(2)
+        pivot_df = df.pivot_table(
+            values="Elapsed Time (sec)",
+            index="Category",
+            columns="Model_Engine",
+            aggfunc="sum",
+            observed=False,
+        ).round(2)
        # Add sum row at bottom
-        pivot_df.loc['total_elapsed_sec'] = pivot_df.sum()
-        pivot_df.fillna('').to_html('temp.html')
-        with (open(f'{output_name}.html', 'a', encoding='utf-8') as
-              outfile, open('temp.html', encoding='utf-8') as infile):
+        pivot_df.loc["total_elapsed_sec"] = pivot_df.sum()
+        pivot_df.fillna("").to_html("temp.html")
+        with (
+            open(f"{output_name}.html", "a", encoding="utf-8") as outfile,
+            open("temp.html", encoding="utf-8") as infile,
+        ):
            outfile.write(infile.read())
-        os.remove('temp.html')
+        os.remove("temp.html")

-        print(f'Finished generating: \n'
-              f' {output_name}.html for stack bar chart \n'
-              f' {output_name}.csv for Kernel-Category mapping')
+        print(
+            f"Finished generating: \n"
+            f" {output_name}.html for stack bar chart \n"
+            f" {output_name}.csv for Kernel-Category mapping"
+        )

    def anno_gpu_kernname(self, df, mapping):
-        """ add "Category" column """
+        """add "Category" column"""

        def anno_gpu_kernname_helper(name):
            for kern_name, val in mapping.items():
                if re.search(kern_name, name):
                    return val

-        df['Category'] = df['Name'].apply(anno_gpu_kernname_helper)
+        df["Category"] = df["Name"].apply(anno_gpu_kernname_helper)

    def make_nongpu_row(self, df, nongpu_sec):
-        """ this will append non-gpu time entry at end of df """
+        """this will append non-gpu time entry at end of df"""
        nongpu_row = self.pd.DataFrame([df.iloc[-1]])
-        nongpu_row['Category'] = nongpu_row['Name'] = 'CPU(non-GPU)'
-        nongpu_row['Instances'] = 1
-        nongpu_row['Elapsed Time (sec)'] = nongpu_sec
-        return (nongpu_row)
+        nongpu_row["Category"] = nongpu_row["Name"] = "CPU(non-GPU)"
+        nongpu_row["Instances"] = 1
+        nongpu_row["Elapsed Time (sec)"] = nongpu_sec
+        return nongpu_row

    def is_valid_file(self, base_file):
-        """ asserts if base_file is non-existent or is empty """
-        assert os.path.isfile(base_file) and os.path.getsize(base_file) > 0, \
-           f"{base_file} doesn't exist or is empty"
+        """asserts if base_file is non-existent or is empty"""
+        assert os.path.isfile(base_file) and os.path.getsize(base_file) > 0, (
+            f"{base_file} doesn't exist or is empty"
+        )

    def should_gen_file(self, new_file, base_file):
-        """ figure out if new file should be generated from base_file """
+        """figure out if new file should be generated from base_file"""
        self.is_valid_file(base_file)
-        if (os.path.exists(new_file)
-                and (os.path.getmtime(new_file) > os.path.getmtime(base_file))
-                and (os.path.getsize(base_file) > 0)):
-            logger.info('reusing %s', new_file)
+        if (
+            os.path.exists(new_file)
+            and (os.path.getmtime(new_file) > os.path.getmtime(base_file))
+            and (os.path.getsize(base_file) > 0)
+        ):
+            logger.info("reusing %s", new_file)
            return False
        else:
-            logger.info('generating %s', new_file)
+            logger.info("generating %s", new_file)
            return True

    def gen_sum_file(self, file, nsys_cmd):
-        """ 
-            generates sum file from nsys trace with times per kernel and
-            returns the name of the sum file
+        """
+        generates sum file from nsys trace with times per kernel and
+        returns the name of the sum file
        """
        import subprocess
+
        file_dir = os.path.dirname(file)
        file_name = os.path.basename(file)

        if not file_dir:
-            file_dir = '.'
+            file_dir = "."
        # Walk through trace and get the total non-overlapped time
-        nsys_stats_file = f'{file_dir}/{file_name}_cuda_gpu_trace.csv'
-        sum_file = f'{file_dir}/{file_name}_cuda_gpu_kernel_tracesum.csv'
+        nsys_stats_file = f"{file_dir}/{file_name}_cuda_gpu_trace.csv"
+        sum_file = f"{file_dir}/{file_name}_cuda_gpu_kernel_tracesum.csv"
        if self.should_gen_file(nsys_stats_file, file):
            cmd = [
-                nsys_cmd, 'stats', '-r', 'cuda_gpu_trace', file, '-o',
-                f'{file_dir}/{file_name}'
+                nsys_cmd,
+                "stats",
+                "-r",
+                "cuda_gpu_trace",
+                file,
+                "-o",
+                f"{file_dir}/{file_name}",
            ]
-            cmd_str = ' '.join(cmd)
-            logger.info('+ %s', cmd_str)
+            cmd_str = " ".join(cmd)
+            logger.info("+ %s", cmd_str)
            # estimate time based on calibrated 240M/min
            file_size_mb = os.path.getsize(file) / 1e6
            logger.info(
-                'nsys stats for %.2f MB file expected to take %.2f min',
-                file_size_mb, file_size_mb / 240)
+                "nsys stats for %.2f MB file expected to take %.2f min",
+                file_size_mb,
+                file_size_mb / 240,
+            )
            try:
                subprocess.run(cmd, check=True)
            except Exception:
-                logger.error("%s failed; Use --nsys_cmd to specify nsys path",
-                             cmd_str)
+                logger.error("%s failed; Use --nsys_cmd to specify nsys path", cmd_str)
                exit(1)
-            logger.info('generating non-overalapped sum %s', sum_file)
+            logger.info("generating non-overalapped sum %s", sum_file)
            self.gen_nonoverlapped_sum_from_gputrace(nsys_stats_file, sum_file)
        self.is_valid_file(sum_file)
-        logger.info('Finished generating %s', sum_file)
+        logger.info("Finished generating %s", sum_file)
        return sum_file

    def gen_graph(self, in_file, out_dir, title, nsys_cmd, engine_model):
-        """ generates graph and csv file from in_file into out_dir """
+        """generates graph and csv file from in_file into out_dir"""
        # Initialize an empty DataFrame to store combined data
        combined_df = self.pd.DataFrame()
        for idx, (file, engine, model, total_sec) in enumerate(in_file):
            file_dir = os.path.dirname(file)
            file_name = os.path.basename(file)
            if not file_dir:
-                file_dir = '.'
+                file_dir = "."
            sum_file = self.gen_sum_file(file, nsys_cmd)
            # read kernel summary file
            df = self.pd.read_csv(sum_file)
            # annotate kernel to their categories
-            assert engine_model.get(engine), f'engine {engine} unknown'
-            assert engine_model[engine].get(model), f'model {model} unknown'
+            assert engine_model.get(engine), f"engine {engine} unknown"
+            assert engine_model[engine].get(model), f"model {model} unknown"
            # remove nsys-rep from file_name for shorter x-label
-            file_name = file_name.replace('.nsys-rep', '')
-            df['Model_Engine'] = f'{model}_{engine}_{file_name}_{idx}'
+            file_name = file_name.replace(".nsys-rep", "")
+            df["Model_Engine"] = f"{model}_{engine}_{file_name}_{idx}"
            self.anno_gpu_kernname(df, engine_model[engine][model])
            # patch in non-gpu time
-            gpu_sec = round(df['Elapsed Time (sec)'].sum(), 1)
+            gpu_sec = round(df["Elapsed Time (sec)"].sum(), 1)
            total_sec = round(float(total_sec), 1)
            if total_sec < gpu_sec:
                logger.warning(
@@ -256,7 +278,7 @@ class GPUTrace2Graph:
            df = self.pd.concat([df, nongpu_row], ignore_index=True)
            combined_df = self.pd.concat([combined_df, df], ignore_index=True)
        if out_dir is None:
-            out_dir = '.'
+            out_dir = "."
        else:
            os.makedirs(out_dir, exist_ok=True)
        # generate html file
@@ -264,50 +286,59 @@ class GPUTrace2Graph:


 def parse_tuple(s):
-    return tuple(s.split(','))
+    return tuple(s.split(","))


 def main():
-    logging.basicConfig(format=('%(asctime)s - %(levelname)s - %(message)s'),
-                        level=logging.INFO)
+    logging.basicConfig(
+        format=("%(asctime)s - %(levelname)s - %(message)s"), level=logging.INFO
+    )
    parser = argparse.ArgumentParser(
        description=(
-            'Process nsys rep and generate kernel non-overlapped cycles. \n'
-            'Example:\n'
+            "Process nsys rep and generate kernel non-overlapped cycles. \n"
+            "Example:\n"
            "gputrc2graph.py --in_file d1.nsys-rep,vllm,llama,100 \n"
            "d2.nsys-rep,vllm,gpt-oss,102 "
-            "--out_dir results/ --title \"Model=gpt-oss vLLM chart\""),
-        formatter_class=argparse.RawDescriptionHelpFormatter)
+            '--out_dir results/ --title "Model=gpt-oss vLLM chart"'
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )

    # load supported engine_model
    engine_model_supported = load_engine_model()
    # Get a string representation of supported engine/model combinations
-    engine_model_supported_str = ', '.join(
+    engine_model_supported_str = ", ".join(
        f"{engine}:[{', '.join(models.keys())}]"
-        for engine, models in engine_model_supported.items())
+        for engine, models in engine_model_supported.items()
+    )
    parser.add_argument(
-        '--in_file',
+        "--in_file",
        type=parse_tuple,
-        nargs='+',
+        nargs="+",
        help=(
-            'list of (nsys-rep, engine, model, elapsed_nonprofiled_sec) '
-            'separated by space. Elapsed_nonprofiled_sec is runtime without '
-            'profiling used to calculate non-gpu time. Specify 0 to use '
-            'elapsed time from nsys-rep but that might inflate non-gpu time. '
-            f'Available engine:[model] are: {engine_model_supported_str} '
-            f'Example: --infile d1.nsys-rep,vllm,llama,100 '
-            'd2.nsys-rep,vllm,gpt-oss,102'),
-        required=True)
-    parser.add_argument('--out_dir', help=('output dir for result.csv/html'))
-    parser.add_argument('--title', help=('title for html chart'))
-    parser.add_argument('--nsys_cmd',
-                        help=('nsys cmd, e.g. /usr/bin/nsys, Default: nsys'),
-                        default="nsys")
+            "list of (nsys-rep, engine, model, elapsed_nonprofiled_sec) "
+            "separated by space. Elapsed_nonprofiled_sec is runtime without "
+            "profiling used to calculate non-gpu time. Specify 0 to use "
+            "elapsed time from nsys-rep but that might inflate non-gpu time. "
+            f"Available engine:[model] are: {engine_model_supported_str} "
+            f"Example: --infile d1.nsys-rep,vllm,llama,100 "
+            "d2.nsys-rep,vllm,gpt-oss,102"
+        ),
+        required=True,
+    )
+    parser.add_argument("--out_dir", help=("output dir for result.csv/html"))
+    parser.add_argument("--title", help=("title for html chart"))
+    parser.add_argument(
+        "--nsys_cmd",
+        help=("nsys cmd, e.g. /usr/bin/nsys, Default: nsys"),
+        default="nsys",
+    )
    args = parser.parse_args()
    gputrace = GPUTrace2Graph()
-    gputrace.gen_graph(args.in_file, args.out_dir, args.title, args.nsys_cmd,
-                       engine_model_supported)
+    gputrace.gen_graph(
+        args.in_file, args.out_dir, args.title, args.nsys_cmd, engine_model_supported
+    )


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()