-
-
Save lettergram/d8f7d9f3d19856d4a0187462445382a0 to your computer and use it in GitHub Desktop.
Generates cProfile for standard dataprofiler usage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import os | |
import sys | |
import json | |
import shutil | |
import pstats | |
import cProfile | |
from functools import wraps | |
def profile(output_file=None, sort_by='cumulative', lines_to_print=None, strip_dirs=False): | |
""" | |
A time profiler decorator. | |
From: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89 | |
Inspired by and modified the profile decorator of Giampaolo Rodola: | |
http://code.activestate.com/recipes/577817-profile-decorator/ | |
Args: | |
output_file: str or None. Default is None | |
Path of the output file. If only name of the file is given, it's | |
saved in the current directory. | |
If it's None, the name of the decorated function is used. | |
sort_by: str or SortKey enum or tuple/list of str/SortKey enum | |
Sorting criteria for the Stats object. | |
For a list of valid string and SortKey refer to: | |
https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats | |
lines_to_print: int or None | |
Number of lines to print. Default (None) is for all the lines. | |
This is useful in reducing the size of the printout, especially | |
that sorting by 'cumulative', the time consuming operations | |
are printed toward the top of the file. | |
strip_dirs: bool | |
Whether to remove the leading path info from file names. | |
This is also useful in reducing the size of the printout | |
Returns: | |
Profile of the decorated function | |
""" | |
def inner(func): | |
@wraps(func) | |
def wrapper(*args, **kwargs): | |
_output_file = output_file or func.__name__ + '.prof' | |
pr = cProfile.Profile() | |
pr.enable() | |
retval = func(*args, **kwargs) | |
pr.disable() | |
pr.dump_stats(_output_file) | |
with open(_output_file, 'w') as f: | |
ps = pstats.Stats(pr, stream=f) | |
if strip_dirs: | |
ps.strip_dirs() | |
if isinstance(sort_by, (tuple, list)): | |
ps.sort_stats(*sort_by) | |
else: | |
ps.sort_stats(sort_by) | |
ps.print_stats(lines_to_print) | |
return retval | |
return wrapper | |
return inner | |
if __name__ == "__main__": | |
filename = sys.argv[1] | |
profile_dir = 'profile_stats/' | |
# Make directory if none exists | |
if not os.path.exists(profile_dir): | |
os.makedirs(profile_dir) | |
import dataprofiler as dp | |
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True) | |
def Data(filename): | |
return dp.Data(filename) | |
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True) | |
def Profile_no_labeler(data): | |
profile_options = dp.ProfilerOptions() | |
profile_options.set({"data_labeler.is_enabled": False}) | |
return dp.Profiler(data, profiler_options=profile_options) | |
@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True) | |
def Profile(data): | |
return dp.Profiler(data) | |
data = Data(filename) | |
profile = Profile_no_labeler(data) | |
profile = Profile(data) | |
human_readable_report = profile.report(report_options={"output_format":"pretty"}) | |
print(json.dumps(human_readable_report, indent=4)) | |
os.system('mv *.prof cprofile_stats') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment