lettergram/profile-dataprofiler.py Secret

## profile-dataprofiler.py
import io
import os
import sys
import json

import shutil

import pstats
import cProfile
from functools import wraps

def profile(output_file=None, sort_by='cumulative', lines_to_print=None, strip_dirs=False):
  """
  A time profiler decorator.

  From: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89

  Inspired by and modified the profile decorator of Giampaolo Rodola:
  http://code.activestate.com/recipes/577817-profile-decorator/

  Args:
      output_file: str or None. Default is None
          Path of the output file. If only name of the file is given, it's
          saved in the current directory.
          If it's None, the name of the decorated function is used.
      sort_by: str or SortKey enum or tuple/list of str/SortKey enum
          Sorting criteria for the Stats object.
          For a list of valid string and SortKey refer to:
          https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
      lines_to_print: int or None
          Number of lines to print. Default (None) is for all the lines.
          This is useful in reducing the size of the printout, especially
          that sorting by 'cumulative', the time consuming operations
          are printed toward the top of the file.
      strip_dirs: bool
          Whether to remove the leading path info from file names.
          This is also useful in reducing the size of the printout
  Returns:
      Profile of the decorated function
  """
  def inner(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
      _output_file = output_file or func.__name__ + '.prof'
      pr = cProfile.Profile()
      pr.enable()
      retval = func(*args, **kwargs)
      pr.disable()
      pr.dump_stats(_output_file)
      with open(_output_file, 'w') as f:
        ps = pstats.Stats(pr, stream=f)
        if strip_dirs:
            ps.strip_dirs()
        if isinstance(sort_by, (tuple, list)):
            ps.sort_stats(*sort_by)
        else:
            ps.sort_stats(sort_by)
        ps.print_stats(lines_to_print)
      return retval
    return wrapper
  return inner


if __name__ == "__main__":

  filename = sys.argv[1]

  profile_dir = 'profile_stats/'

  # Make directory if none exists
  if not os.path.exists(profile_dir):
      os.makedirs(profile_dir)


  import dataprofiler as dp

  @profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
  def Data(filename):
      return dp.Data(filename)


  @profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
  def Profile_no_labeler(data):
      profile_options = dp.ProfilerOptions()
  profile_options.set({"data_labeler.is_enabled": False})
  return dp.Profiler(data, profiler_options=profile_options)


  @profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
  def Profile(data):
      return dp.Profiler(data)


  data = Data(filename)
  profile = Profile_no_labeler(data)
  profile = Profile(data)


  human_readable_report = profile.report(report_options={"output_format":"pretty"})
  print(json.dumps(human_readable_report, indent=4))

  os.system('mv *.prof cprofile_stats')
	import io
	import os
	import sys
	import json

	import shutil

	import pstats
	import cProfile
	from functools import wraps

	def profile(output_file=None, sort_by='cumulative', lines_to_print=None, strip_dirs=False):
	"""
	A time profiler decorator.

	From: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89

	Inspired by and modified the profile decorator of Giampaolo Rodola:
	http://code.activestate.com/recipes/577817-profile-decorator/

	Args:
	output_file: str or None. Default is None
	Path of the output file. If only name of the file is given, it's
	saved in the current directory.
	If it's None, the name of the decorated function is used.
	sort_by: str or SortKey enum or tuple/list of str/SortKey enum
	Sorting criteria for the Stats object.
	For a list of valid string and SortKey refer to:
	https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
	lines_to_print: int or None
	Number of lines to print. Default (None) is for all the lines.
	This is useful in reducing the size of the printout, especially
	that sorting by 'cumulative', the time consuming operations
	are printed toward the top of the file.
	strip_dirs: bool
	Whether to remove the leading path info from file names.
	This is also useful in reducing the size of the printout
	Returns:
	Profile of the decorated function
	"""
	def inner(func):
	@wraps(func)
	def wrapper(args, *kwargs):
	_output_file = output_file or func.__name__ + '.prof'
	pr = cProfile.Profile()
	pr.enable()
	retval = func(args, *kwargs)
	pr.disable()
	pr.dump_stats(_output_file)
	with open(_output_file, 'w') as f:
	ps = pstats.Stats(pr, stream=f)
	if strip_dirs:
	ps.strip_dirs()
	if isinstance(sort_by, (tuple, list)):
	ps.sort_stats(*sort_by)
	else:
	ps.sort_stats(sort_by)
	ps.print_stats(lines_to_print)
	return retval
	return wrapper
	return inner


	if __name__ == "__main__":

	filename = sys.argv[1]

	profile_dir = 'profile_stats/'

	# Make directory if none exists
	if not os.path.exists(profile_dir):
	os.makedirs(profile_dir)


	import dataprofiler as dp

	@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
	def Data(filename):
	return dp.Data(filename)


	@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
	def Profile_no_labeler(data):
	profile_options = dp.ProfilerOptions()
	profile_options.set({"data_labeler.is_enabled": False})
	return dp.Profiler(data, profiler_options=profile_options)


	@profile(sort_by='tottime', lines_to_print=20, strip_dirs=True)
	def Profile(data):
	return dp.Profiler(data)


	data = Data(filename)
	profile = Profile_no_labeler(data)
	profile = Profile(data)


	human_readable_report = profile.report(report_options={"output_format":"pretty"})
	print(json.dumps(human_readable_report, indent=4))

	os.system('mv *.prof cprofile_stats')