Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -1,10 +1,11 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
This generates gpu kernel analysis output from nsys rep. Will call nsys
|
||||
stats -r cuda_gpu_kern_trace, get non-overlapped gpu cycles, then generate
|
||||
csv and html output for analysis
|
||||
This generates gpu kernel analysis output from nsys rep. Will call nsys
|
||||
stats -r cuda_gpu_kern_trace, get non-overlapped gpu cycles, then generate
|
||||
csv and html output for analysis
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
@@ -16,13 +17,13 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# helper data class for annotating kernels
|
||||
def load_engine_model():
|
||||
""" returns engine_model built from all json files in the current dir """
|
||||
"""returns engine_model built from all json files in the current dir"""
|
||||
import glob
|
||||
import json
|
||||
|
||||
engine_model = {}
|
||||
|
||||
json_files = glob.glob(
|
||||
os.path.join(os.path.dirname(__file__) or ".", "*.json"))
|
||||
json_files = glob.glob(os.path.join(os.path.dirname(__file__) or ".", "*.json"))
|
||||
for fname in json_files:
|
||||
with open(fname, encoding="utf-8") as f:
|
||||
engine_model.update(json.load(f))
|
||||
@@ -30,54 +31,54 @@ def load_engine_model():
|
||||
|
||||
|
||||
class GPUTrace2Graph:
|
||||
"""
|
||||
Parses output of nsys report, generates csv and bar chart output
|
||||
"""
|
||||
Parses output of nsys report, generates csv and bar chart output
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
import pandas as pd # avoid importing till needed
|
||||
|
||||
self.pd = pd
|
||||
self.pd.options.mode.copy_on_write = True
|
||||
|
||||
# helper functions for generating trace->summary csvs
|
||||
def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file):
|
||||
logger.info('loading %s', in_file)
|
||||
logger.info("loading %s", in_file)
|
||||
df = self.pd.read_csv(
|
||||
in_file,
|
||||
usecols=['Start (ns)', 'Duration (ns)', 'Device', 'Strm', 'Name'])
|
||||
df['End (ns)'] = df['Start (ns)'] + df['Duration (ns)']
|
||||
in_file, usecols=["Start (ns)", "Duration (ns)", "Device", "Strm", "Name"]
|
||||
)
|
||||
df["End (ns)"] = df["Start (ns)"] + df["Duration (ns)"]
|
||||
df = self.sum_non_overlapping_intervals(df)
|
||||
# get ready to print table with elapsed times per kernel
|
||||
df['Instances'] = 1
|
||||
df_sum = df.groupby('Name', as_index=False).agg({
|
||||
'Elapsed Time (ns)': 'sum',
|
||||
'Duration (ns)': 'sum',
|
||||
'Instances': 'size'
|
||||
})
|
||||
df["Instances"] = 1
|
||||
df_sum = df.groupby("Name", as_index=False).agg(
|
||||
{"Elapsed Time (ns)": "sum", "Duration (ns)": "sum", "Instances": "size"}
|
||||
)
|
||||
|
||||
# generate csv
|
||||
df_sum['Total Time (sec)'] = df_sum['Duration (ns)'] / 1e9
|
||||
df_sum['Elapsed Time (sec)'] = df_sum['Elapsed Time (ns)'] / 1e9
|
||||
df_sum = df_sum.sort_values(by='Elapsed Time (sec)', ascending=False)
|
||||
df_sum[['Elapsed Time (sec)', 'Total Time (sec)', 'Instances',
|
||||
'Name']].to_csv(out_file, index=False)
|
||||
df_sum["Total Time (sec)"] = df_sum["Duration (ns)"] / 1e9
|
||||
df_sum["Elapsed Time (sec)"] = df_sum["Elapsed Time (ns)"] / 1e9
|
||||
df_sum = df_sum.sort_values(by="Elapsed Time (sec)", ascending=False)
|
||||
df_sum[["Elapsed Time (sec)", "Total Time (sec)", "Instances", "Name"]].to_csv(
|
||||
out_file, index=False
|
||||
)
|
||||
|
||||
def sum_non_overlapping_intervals(self, df):
|
||||
"""
|
||||
returns new sorted df with Elapsed Time (ns) column using
|
||||
vectorized operations
|
||||
"""
|
||||
returns new sorted df with Elapsed Time (ns) column using
|
||||
vectorized operations
|
||||
"""
|
||||
logger.info("sorting %s trace records by start time", str(df.shape))
|
||||
|
||||
# Sort by start time and reset index
|
||||
df = df.sort_values(by='Start (ns)').reset_index(drop=True)
|
||||
df = df.sort_values(by="Start (ns)").reset_index(drop=True)
|
||||
|
||||
# Initialize elapsed time as duration
|
||||
df['Elapsed Time (ns)'] = df['Duration (ns)']
|
||||
df["Elapsed Time (ns)"] = df["Duration (ns)"]
|
||||
|
||||
# Get numpy arrays for faster operations
|
||||
starts = df['Start (ns)'].values
|
||||
ends = df['End (ns)'].values
|
||||
starts = df["Start (ns)"].values
|
||||
ends = df["End (ns)"].values
|
||||
|
||||
# Keep track of current interval end
|
||||
current_end = ends[0]
|
||||
@@ -85,16 +86,17 @@ class GPUTrace2Graph:
|
||||
# Update current_end for overlapping intervals
|
||||
for i in range(1, len(df)):
|
||||
if i % display_units == 0:
|
||||
print(f'processing trace: {int(i/len(df) * 100)} %', end="\r")
|
||||
print(f"processing trace: {int(i / len(df) * 100)} %", end="\r")
|
||||
if starts[i] <= current_end:
|
||||
if ends[i] > current_end:
|
||||
# Partial overlap
|
||||
df.iloc[i, df.columns.get_loc('Elapsed Time (ns)'
|
||||
)] = ends[i] - current_end
|
||||
df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = (
|
||||
ends[i] - current_end
|
||||
)
|
||||
current_end = ends[i]
|
||||
else:
|
||||
# Complete overlap
|
||||
df.iloc[i, df.columns.get_loc('Elapsed Time (ns)')] = 0
|
||||
df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = 0
|
||||
else:
|
||||
# No overlap
|
||||
current_end = ends[i]
|
||||
@@ -103,147 +105,167 @@ class GPUTrace2Graph:
|
||||
|
||||
# functions for generating html files
|
||||
def make_html(self, df, output_dir, title):
|
||||
""" make html graph from df """
|
||||
"""make html graph from df"""
|
||||
import plotly.express as px
|
||||
|
||||
if df.empty:
|
||||
return
|
||||
output_name = output_dir + '/result'
|
||||
output_name = output_dir + "/result"
|
||||
if not title:
|
||||
title = 'Model_Engine'
|
||||
x = 'Model_Engine'
|
||||
y = 'Elapsed Time (sec)'
|
||||
color = 'Category'
|
||||
title = "Model_Engine"
|
||||
x = "Model_Engine"
|
||||
y = "Elapsed Time (sec)"
|
||||
color = "Category"
|
||||
""" generate kernel mapping table """
|
||||
# Sort Model_Engine categories by last field after underscore
|
||||
df['Model_Engine'] = self.pd.Categorical(
|
||||
df['Model_Engine'],
|
||||
sorted(df['Model_Engine'].unique(),
|
||||
key=lambda x: x.split('_')[-1]))
|
||||
df[['Model_Engine', color, 'Instances', 'Name',
|
||||
y]].sort_values(by=color).to_csv(f'{output_name}.csv', index=False)
|
||||
graph = px.histogram(df.round(2),
|
||||
x=x,
|
||||
y=y,
|
||||
title=(f'{y} for {title}'),
|
||||
color=color,
|
||||
text_auto=True)
|
||||
df["Model_Engine"] = self.pd.Categorical(
|
||||
df["Model_Engine"],
|
||||
sorted(df["Model_Engine"].unique(), key=lambda x: x.split("_")[-1]),
|
||||
)
|
||||
df[["Model_Engine", color, "Instances", "Name", y]].sort_values(
|
||||
by=color
|
||||
).to_csv(f"{output_name}.csv", index=False)
|
||||
graph = px.histogram(
|
||||
df.round(2),
|
||||
x=x,
|
||||
y=y,
|
||||
title=(f"{y} for {title}"),
|
||||
color=color,
|
||||
text_auto=True,
|
||||
)
|
||||
# wrap x axis labels
|
||||
graph.update_xaxes(automargin=True)
|
||||
graph.write_html(f'{output_name}.html')
|
||||
graph.write_html(f"{output_name}.html")
|
||||
"""
|
||||
Generate data table with columns per Model_Engine into result.html
|
||||
"""
|
||||
pivot_df = df.pivot_table(values='Elapsed Time (sec)',
|
||||
index='Category',
|
||||
columns='Model_Engine',
|
||||
aggfunc='sum',
|
||||
observed=False).round(2)
|
||||
pivot_df = df.pivot_table(
|
||||
values="Elapsed Time (sec)",
|
||||
index="Category",
|
||||
columns="Model_Engine",
|
||||
aggfunc="sum",
|
||||
observed=False,
|
||||
).round(2)
|
||||
# Add sum row at bottom
|
||||
pivot_df.loc['total_elapsed_sec'] = pivot_df.sum()
|
||||
pivot_df.fillna('').to_html('temp.html')
|
||||
with (open(f'{output_name}.html', 'a', encoding='utf-8') as
|
||||
outfile, open('temp.html', encoding='utf-8') as infile):
|
||||
pivot_df.loc["total_elapsed_sec"] = pivot_df.sum()
|
||||
pivot_df.fillna("").to_html("temp.html")
|
||||
with (
|
||||
open(f"{output_name}.html", "a", encoding="utf-8") as outfile,
|
||||
open("temp.html", encoding="utf-8") as infile,
|
||||
):
|
||||
outfile.write(infile.read())
|
||||
os.remove('temp.html')
|
||||
os.remove("temp.html")
|
||||
|
||||
print(f'Finished generating: \n'
|
||||
f' {output_name}.html for stack bar chart \n'
|
||||
f' {output_name}.csv for Kernel-Category mapping')
|
||||
print(
|
||||
f"Finished generating: \n"
|
||||
f" {output_name}.html for stack bar chart \n"
|
||||
f" {output_name}.csv for Kernel-Category mapping"
|
||||
)
|
||||
|
||||
def anno_gpu_kernname(self, df, mapping):
|
||||
""" add "Category" column """
|
||||
"""add "Category" column"""
|
||||
|
||||
def anno_gpu_kernname_helper(name):
|
||||
for kern_name, val in mapping.items():
|
||||
if re.search(kern_name, name):
|
||||
return val
|
||||
|
||||
df['Category'] = df['Name'].apply(anno_gpu_kernname_helper)
|
||||
df["Category"] = df["Name"].apply(anno_gpu_kernname_helper)
|
||||
|
||||
def make_nongpu_row(self, df, nongpu_sec):
|
||||
""" this will append non-gpu time entry at end of df """
|
||||
"""this will append non-gpu time entry at end of df"""
|
||||
nongpu_row = self.pd.DataFrame([df.iloc[-1]])
|
||||
nongpu_row['Category'] = nongpu_row['Name'] = 'CPU(non-GPU)'
|
||||
nongpu_row['Instances'] = 1
|
||||
nongpu_row['Elapsed Time (sec)'] = nongpu_sec
|
||||
return (nongpu_row)
|
||||
nongpu_row["Category"] = nongpu_row["Name"] = "CPU(non-GPU)"
|
||||
nongpu_row["Instances"] = 1
|
||||
nongpu_row["Elapsed Time (sec)"] = nongpu_sec
|
||||
return nongpu_row
|
||||
|
||||
def is_valid_file(self, base_file):
|
||||
""" asserts if base_file is non-existent or is empty """
|
||||
assert os.path.isfile(base_file) and os.path.getsize(base_file) > 0, \
|
||||
f"{base_file} doesn't exist or is empty"
|
||||
"""asserts if base_file is non-existent or is empty"""
|
||||
assert os.path.isfile(base_file) and os.path.getsize(base_file) > 0, (
|
||||
f"{base_file} doesn't exist or is empty"
|
||||
)
|
||||
|
||||
def should_gen_file(self, new_file, base_file):
|
||||
""" figure out if new file should be generated from base_file """
|
||||
"""figure out if new file should be generated from base_file"""
|
||||
self.is_valid_file(base_file)
|
||||
if (os.path.exists(new_file)
|
||||
and (os.path.getmtime(new_file) > os.path.getmtime(base_file))
|
||||
and (os.path.getsize(base_file) > 0)):
|
||||
logger.info('reusing %s', new_file)
|
||||
if (
|
||||
os.path.exists(new_file)
|
||||
and (os.path.getmtime(new_file) > os.path.getmtime(base_file))
|
||||
and (os.path.getsize(base_file) > 0)
|
||||
):
|
||||
logger.info("reusing %s", new_file)
|
||||
return False
|
||||
else:
|
||||
logger.info('generating %s', new_file)
|
||||
logger.info("generating %s", new_file)
|
||||
return True
|
||||
|
||||
def gen_sum_file(self, file, nsys_cmd):
|
||||
"""
|
||||
generates sum file from nsys trace with times per kernel and
|
||||
returns the name of the sum file
|
||||
"""
|
||||
generates sum file from nsys trace with times per kernel and
|
||||
returns the name of the sum file
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
file_dir = os.path.dirname(file)
|
||||
file_name = os.path.basename(file)
|
||||
|
||||
if not file_dir:
|
||||
file_dir = '.'
|
||||
file_dir = "."
|
||||
# Walk through trace and get the total non-overlapped time
|
||||
nsys_stats_file = f'{file_dir}/{file_name}_cuda_gpu_trace.csv'
|
||||
sum_file = f'{file_dir}/{file_name}_cuda_gpu_kernel_tracesum.csv'
|
||||
nsys_stats_file = f"{file_dir}/{file_name}_cuda_gpu_trace.csv"
|
||||
sum_file = f"{file_dir}/{file_name}_cuda_gpu_kernel_tracesum.csv"
|
||||
if self.should_gen_file(nsys_stats_file, file):
|
||||
cmd = [
|
||||
nsys_cmd, 'stats', '-r', 'cuda_gpu_trace', file, '-o',
|
||||
f'{file_dir}/{file_name}'
|
||||
nsys_cmd,
|
||||
"stats",
|
||||
"-r",
|
||||
"cuda_gpu_trace",
|
||||
file,
|
||||
"-o",
|
||||
f"{file_dir}/{file_name}",
|
||||
]
|
||||
cmd_str = ' '.join(cmd)
|
||||
logger.info('+ %s', cmd_str)
|
||||
cmd_str = " ".join(cmd)
|
||||
logger.info("+ %s", cmd_str)
|
||||
# estimate time based on calibrated 240M/min
|
||||
file_size_mb = os.path.getsize(file) / 1e6
|
||||
logger.info(
|
||||
'nsys stats for %.2f MB file expected to take %.2f min',
|
||||
file_size_mb, file_size_mb / 240)
|
||||
"nsys stats for %.2f MB file expected to take %.2f min",
|
||||
file_size_mb,
|
||||
file_size_mb / 240,
|
||||
)
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except Exception:
|
||||
logger.error("%s failed; Use --nsys_cmd to specify nsys path",
|
||||
cmd_str)
|
||||
logger.error("%s failed; Use --nsys_cmd to specify nsys path", cmd_str)
|
||||
exit(1)
|
||||
logger.info('generating non-overalapped sum %s', sum_file)
|
||||
logger.info("generating non-overalapped sum %s", sum_file)
|
||||
self.gen_nonoverlapped_sum_from_gputrace(nsys_stats_file, sum_file)
|
||||
self.is_valid_file(sum_file)
|
||||
logger.info('Finished generating %s', sum_file)
|
||||
logger.info("Finished generating %s", sum_file)
|
||||
return sum_file
|
||||
|
||||
def gen_graph(self, in_file, out_dir, title, nsys_cmd, engine_model):
|
||||
""" generates graph and csv file from in_file into out_dir """
|
||||
"""generates graph and csv file from in_file into out_dir"""
|
||||
# Initialize an empty DataFrame to store combined data
|
||||
combined_df = self.pd.DataFrame()
|
||||
for idx, (file, engine, model, total_sec) in enumerate(in_file):
|
||||
file_dir = os.path.dirname(file)
|
||||
file_name = os.path.basename(file)
|
||||
if not file_dir:
|
||||
file_dir = '.'
|
||||
file_dir = "."
|
||||
sum_file = self.gen_sum_file(file, nsys_cmd)
|
||||
# read kernel summary file
|
||||
df = self.pd.read_csv(sum_file)
|
||||
# annotate kernel to their categories
|
||||
assert engine_model.get(engine), f'engine {engine} unknown'
|
||||
assert engine_model[engine].get(model), f'model {model} unknown'
|
||||
assert engine_model.get(engine), f"engine {engine} unknown"
|
||||
assert engine_model[engine].get(model), f"model {model} unknown"
|
||||
# remove nsys-rep from file_name for shorter x-label
|
||||
file_name = file_name.replace('.nsys-rep', '')
|
||||
df['Model_Engine'] = f'{model}_{engine}_{file_name}_{idx}'
|
||||
file_name = file_name.replace(".nsys-rep", "")
|
||||
df["Model_Engine"] = f"{model}_{engine}_{file_name}_{idx}"
|
||||
self.anno_gpu_kernname(df, engine_model[engine][model])
|
||||
# patch in non-gpu time
|
||||
gpu_sec = round(df['Elapsed Time (sec)'].sum(), 1)
|
||||
gpu_sec = round(df["Elapsed Time (sec)"].sum(), 1)
|
||||
total_sec = round(float(total_sec), 1)
|
||||
if total_sec < gpu_sec:
|
||||
logger.warning(
|
||||
@@ -256,7 +278,7 @@ class GPUTrace2Graph:
|
||||
df = self.pd.concat([df, nongpu_row], ignore_index=True)
|
||||
combined_df = self.pd.concat([combined_df, df], ignore_index=True)
|
||||
if out_dir is None:
|
||||
out_dir = '.'
|
||||
out_dir = "."
|
||||
else:
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
# generate html file
|
||||
@@ -264,50 +286,59 @@ class GPUTrace2Graph:
|
||||
|
||||
|
||||
def parse_tuple(s):
|
||||
return tuple(s.split(','))
|
||||
return tuple(s.split(","))
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(format=('%(asctime)s - %(levelname)s - %(message)s'),
|
||||
level=logging.INFO)
|
||||
logging.basicConfig(
|
||||
format=("%(asctime)s - %(levelname)s - %(message)s"), level=logging.INFO
|
||||
)
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
'Process nsys rep and generate kernel non-overlapped cycles. \n'
|
||||
'Example:\n'
|
||||
"Process nsys rep and generate kernel non-overlapped cycles. \n"
|
||||
"Example:\n"
|
||||
"gputrc2graph.py --in_file d1.nsys-rep,vllm,llama,100 \n"
|
||||
"d2.nsys-rep,vllm,gpt-oss,102 "
|
||||
"--out_dir results/ --title \"Model=gpt-oss vLLM chart\""),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
'--out_dir results/ --title "Model=gpt-oss vLLM chart"'
|
||||
),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
|
||||
# load supported engine_model
|
||||
engine_model_supported = load_engine_model()
|
||||
# Get a string representation of supported engine/model combinations
|
||||
engine_model_supported_str = ', '.join(
|
||||
engine_model_supported_str = ", ".join(
|
||||
f"{engine}:[{', '.join(models.keys())}]"
|
||||
for engine, models in engine_model_supported.items())
|
||||
for engine, models in engine_model_supported.items()
|
||||
)
|
||||
parser.add_argument(
|
||||
'--in_file',
|
||||
"--in_file",
|
||||
type=parse_tuple,
|
||||
nargs='+',
|
||||
nargs="+",
|
||||
help=(
|
||||
'list of (nsys-rep, engine, model, elapsed_nonprofiled_sec) '
|
||||
'separated by space. Elapsed_nonprofiled_sec is runtime without '
|
||||
'profiling used to calculate non-gpu time. Specify 0 to use '
|
||||
'elapsed time from nsys-rep but that might inflate non-gpu time. '
|
||||
f'Available engine:[model] are: {engine_model_supported_str} '
|
||||
f'Example: --infile d1.nsys-rep,vllm,llama,100 '
|
||||
'd2.nsys-rep,vllm,gpt-oss,102'),
|
||||
required=True)
|
||||
parser.add_argument('--out_dir', help=('output dir for result.csv/html'))
|
||||
parser.add_argument('--title', help=('title for html chart'))
|
||||
parser.add_argument('--nsys_cmd',
|
||||
help=('nsys cmd, e.g. /usr/bin/nsys, Default: nsys'),
|
||||
default="nsys")
|
||||
"list of (nsys-rep, engine, model, elapsed_nonprofiled_sec) "
|
||||
"separated by space. Elapsed_nonprofiled_sec is runtime without "
|
||||
"profiling used to calculate non-gpu time. Specify 0 to use "
|
||||
"elapsed time from nsys-rep but that might inflate non-gpu time. "
|
||||
f"Available engine:[model] are: {engine_model_supported_str} "
|
||||
f"Example: --infile d1.nsys-rep,vllm,llama,100 "
|
||||
"d2.nsys-rep,vllm,gpt-oss,102"
|
||||
),
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument("--out_dir", help=("output dir for result.csv/html"))
|
||||
parser.add_argument("--title", help=("title for html chart"))
|
||||
parser.add_argument(
|
||||
"--nsys_cmd",
|
||||
help=("nsys cmd, e.g. /usr/bin/nsys, Default: nsys"),
|
||||
default="nsys",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
gputrace = GPUTrace2Graph()
|
||||
gputrace.gen_graph(args.in_file, args.out_dir, args.title, args.nsys_cmd,
|
||||
engine_model_supported)
|
||||
gputrace.gen_graph(
|
||||
args.in_file, args.out_dir, args.title, args.nsys_cmd, engine_model_supported
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user