Skip to content

Instantly share code, notes, and snippets.

@FindHao
Last active July 9, 2021 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FindHao/664a881cfa892df34f22e9094a15167c to your computer and use it in GitHub Desktop.
Save FindHao/664a881cfa892df34f22e9094a15167c to your computer and use it in GitHub Desktop.
filter kernel execution time, memory copy traffic, and cudaapi time from nsys report
#!/usr/bin/env python3
import pandas as pd
import os
import re
import argparse
reports_with_path = {}
execute_name = ''
execute_times = 0
prefix_path = ''
output_prefix = ''
def get_execute_name(reports_path, a_execute_name='', output_name=''):
global reports_with_path, execute_name, execute_times, prefix_path, output_prefix
reg = re.compile(r"report(\d+)")
# reports_path always end with /reports/
tmp_report_path = reports_path
if reports_path.endswith('/'):
tmp_report_path = tmp_report_path[:-1]
output_prefix = os.path.basename(tmp_report_path[:-8])
if a_execute_name:
execute_name = a_execute_name
for parent, dirnames, filenames in os.walk(reports_path, followlinks=True):
for filename in filenames:
file_path = os.path.join(parent, filename)
reports_with_path[filename] = file_path
if not execute_name and filename.endswith(".qdrep"):
execute_name = filename[:-6].split("_")[1]
if not prefix_path:
prefix_path = parent
result = reg.findall(filename)
execute_times = max(int(result[0]), execute_times)
if output_name:
output_prefix = output_name
print("execute_name, execute_times, prefix_path, output_prefix")
print(execute_name, execute_times, prefix_path, output_prefix)
def gpu_kernel_time():
k = None
final = pd.DataFrame()
for i in range(1, execute_times+1):
gpukernelsum = prefix_path + \
"/report%d_%s_gpukernsum.csv" % (i, execute_name)
kernel_time = pd.read_csv(gpukernelsum)
need_data = kernel_time[['Name', 'Total Time (ns)']]
if i == 1:
k = need_data
else:
k = pd.merge(k, need_data, how='outer', on=['Name'])
final = pd.DataFrame()
final['Name'] = k.loc[:, 'Name']
value = k.iloc[:, 1:]
mean_avg = value.mean(axis=1)
final['Kenerl Time (ns)'] = mean_avg
final = final.append(
{'Name': 'sum_above', 'Kenerl Time (ns)': final.iloc[:, 1:].sum().tolist()[0]}, ignore_index=True)
final.to_csv(output_prefix + "_kernel_time.csv")
def cuda_api():
k = None
final = pd.DataFrame()
for i in range(1, execute_times+1):
gpukernelsum = prefix_path + \
"/report%d_%s_cudaapisum.csv" % (i, execute_name)
kernel_time = pd.read_csv(gpukernelsum)
need_data = kernel_time[['Name', 'Total Time (ns)']]
if i == 1:
k = need_data
else:
k = pd.merge(k, need_data, how='outer', on=['Name'])
final = pd.DataFrame()
final['Name'] = k.loc[:, 'Name']
value = k.iloc[:, 1:]
mean_avg = value.mean(axis=1)
final['Kenerl Time (ns)'] = mean_avg
final = final.append(
{'Name': 'sum_above', 'Kenerl Time (ns)': final.iloc[:, 1:].sum().tolist()[0]}, ignore_index=True)
final.to_csv(output_prefix + "_cudaapi.csv")
def gpu_mem_size():
k = None
final = pd.DataFrame()
for i in range(1, execute_times+1):
gpukernelsum = prefix_path + \
"/report%d_%s_gpumemsizesum.csv" % (i, execute_name)
try:
kernel_time = pd.read_csv(gpukernelsum)
except FileNotFoundError as e:
print(e)
return
need_data = kernel_time[['Operation', 'Total']]
if i == 1:
k = need_data
else:
k = pd.merge(k, need_data, how='outer', on=['Operation'])
final = pd.DataFrame()
final['Operation'] = k.loc[:, 'Operation']
value = k.iloc[:, 1:]
mean_avg = value.mean(axis=1)
final['Total'] = mean_avg
final = final.append(
{'Operation': 'sum_above', 'Total': final.iloc[:, 1:].sum().tolist()[0]}, ignore_index=True)
final.to_csv(output_prefix + "_memsize.csv")
def gpu_mem_time():
k = None
final = pd.DataFrame()
for i in range(1, execute_times+1):
gpukernelsum = prefix_path + \
"/report%d_%s_gpumemtimesum.csv" % (i, execute_name)
try:
kernel_time = pd.read_csv(gpukernelsum)
except FileNotFoundError as e:
print(e)
return
need_data = kernel_time[['Operation', 'Total Time (ns)']]
if i == 1:
k = need_data
else:
k = pd.merge(k, need_data, how='outer', on=['Operation'])
final = pd.DataFrame()
final['Operation'] = k.loc[:, 'Operation']
value = k.iloc[:, 1:]
mean_avg = value.mean(axis=1)
final['Total'] = mean_avg
final = final.append(
{'Operation': 'sum_above', 'Total': final.iloc[:, 1:].sum().tolist()[0]}, ignore_index=True)
final.to_csv(output_prefix + "_memtime.csv")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--report-path', metavar='The path of nsys reports.',
required=True, dest='report_path', action='store')
parser.add_argument('-e', '--execute_name',
metavar='report1_XXX_gpukernsum.csv, the middle XXX',
required=False, dest='execute_name', action='store')
parser.add_argument('-o', '--output',
metavar='The output filename you want to set',
required=False, dest='output_name', action='store')
args = parser.parse_args()
if not os.path.exists(args.report_path):
print("Could not find reports at ", args.report_path)
exit(-1)
get_execute_name(args.report_path, args.execute_name, args.output_name)
gpu_kernel_time()
cuda_api()
gpu_mem_time()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment