Skip to content

Instantly share code, notes, and snippets.

@lettergram
Last active March 22, 2021 01:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lettergram/d8f7d9f3d19856d4a0187462445382a0 to your computer and use it in GitHub Desktop.
Save lettergram/d8f7d9f3d19856d4a0187462445382a0 to your computer and use it in GitHub Desktop.
Generates cProfile for standard dataprofiler usage
import io
import os
import sys
import json
import shutil
import pstats
import cProfile
from functools import wraps
def profile(output_file=None, sort_by='cumulative', lines_to_print=None, strip_dirs=False):
"""
A time profiler decorator.
From: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89
Inspired by and modified the profile decorator of Giampaolo Rodola:
http://code.activestate.com/recipes/577817-profile-decorator/
Args:
output_file: str or None. Default is None
Path of the output file. If only name of the file is given, it's
saved in the current directory.
If it's None, the name of the decorated function is used.
sort_by: str or SortKey enum or tuple/list of str/SortKey enum
Sorting criteria for the Stats object.
For a list of valid string and SortKey refer to:
https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
lines_to_print: int or None
Number of lines to print. Default (None) is for all the lines.
This is useful in reducing the size of the printout, especially
that sorting by 'cumulative', the time consuming operations
are printed toward the top of the file.
strip_dirs: bool
Whether to remove the leading path info from file names.
This is also useful in reducing the size of the printout
Returns:
Profile of the decorated function
"""
def inner(func):
@wraps(func)
def wrapper(*args, **kwargs):
_output_file = output_file or func.__name__ + '.prof'
pr = cProfile.Profile()
pr.enable()
retval = func(*args, **kwargs)
pr.disable()
pr.dump_stats(_output_file)
with open(_output_file, 'w') as f:
ps = pstats.Stats(pr, stream=f)
if strip_dirs:
ps.strip_dirs()
if isinstance(sort_by, (tuple, list)):
ps.sort_stats(*sort_by)
else:
ps.sort_stats(sort_by)
ps.print_stats(lines_to_print)
return retval
return wrapper
return inner
if __name__ == "__main__":
filename = sys.argv[1]
profile_dir = 'profile_stats/'
# Make directory if none exists
if not os.path.exists(profile_dir):
os.makedirs(profile_dir)
import dataprofiler as dp
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
def Data(filename):
return dp.Data(filename)
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
def Profile_no_labeler(data):
profile_options = dp.ProfilerOptions()
profile_options.set({"data_labeler.is_enabled": False})
return dp.Profiler(data, profiler_options=profile_options)
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
def Profile(data):
return dp.Profiler(data)
data = Data(filename)
profile = Profile_no_labeler(data)
profile = Profile(data)
human_readable_report = profile.report(report_options={"output_format":"pretty"})
print(json.dumps(human_readable_report, indent=4))
os.system('mv *.prof cprofile_stats')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment