tomasdelvechio/comparacionTecnicasParaGenerarFilename.ipynb Secret

## comparacionTecnicasParaGenerarFilename.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              comparacionTecnicasParaGenerarFilename.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## timeit_plot.py

import timeit
from collections import defaultdict
from itertools import product
import heapq
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter


def timeit_compare(funcs, inputs, setups='pass', **kwargs):
    """Compares speed of functions across input conditions.

    'funcs' should be a list of functions expressed as strings.
    String substitution is done on each function while iterating
    over ranges of values in 'inputs' to compare speed.

    'inputs' should be an iterable range of values over which 'funcs'
    should be tested.

    'setups' can be 'pass' for no setup, 'main' to import each function
    from the local environment, or a list of setup strings that maps
    to the list 'funcs'.

    Other inputs:
        'number' - the number of times to run the function (a timeit option)
        'print_conditions' - if True, print the combinations of input conditions

    >>> functions = ['"-".join(str(n) for n in range({0}))',
                 '"-".join([str(n) for n in range({0})])',
                 '"-".join(map(str, range({0})))']
    >>> data = timeit_compare(functions, [range(10,101,10)], number=10000)
    testing "-".join(str(n) for n in range({0}))...
    testing "-".join([str(n) for n in range({0})])...
    testing "-".join(map(str, range({0})))...

    Returns a defaultdict that has function names as keys, results as values.
    """
    number = kwargs.get('number', 100000)
    print_conditions = kwargs.get('print_conditions', False)
    performance = defaultdict(list)
    if isinstance(setups, list):
        # user specifies their own list of setups corresponding to funcs
        pass
    elif setups == 'pass':
        # specify no setups for built-in functions like join
        setups = ['pass' for f in funcs]
    elif setups == 'main':
        # uniformly import all setups from the local environment
        fnames = [f[:f.find("(")] for f in funcs]
        setups = ["from __main__ import " + fname for fname in fnames]

    # convert the input ranges to a set of conditions
    conditions = get_conditions(inputs)
    if print_conditions:
        print "conditions: " + conditions

    def timer(func, value, setup):
        return timeit.Timer(func.format(*value), setup=setup)

    for i, f in enumerate(funcs):
        print "testing " + f + "..."
        for value in conditions:
            test = timer(f, value, setups[i])
            result = test.timeit(number=number)
            performance[f].append(list(value) + [result])
    return performance


def get_conditions(inputs):
    """Converts conditions for individual variables into an
    exhaustive list of combinations for timeit_compare().
    """
   # itertools.product summarizes all combinations of ordered conditions
    # at len = 1 it wraps values in tuples (0,) that confuse the timer below
    if hasattr(inputs[0], '__iter__'):
        return list(product(*inputs))
    else:
        return [[n] if not isinstance(n,(list,tuple)) else n for n in inputs]


# TODO: function to filter a large dataset
# TODO: filter and options to plot 2 and 3 grams in different colors
#       on the same chart


def timeit_plot2D(data, xlabel='xlabel', title='title', **kwargs):
    """Plots the results from a defaultdict returned by timeit_compare.

    Each function will be plotted as a different series.

    timeit_compare may test many conditions, and the order of the conditions
    in the results data can be understood from the string substitutions
    noted in the keys of the defaultdict. By default series=0 means
    that the first series is plotted, but this can be changed to plot
    any of the testing conditions available.
    """
    series = kwargs.get('series', 0)
    style = kwargs.get('style', 'line')
    size = kwargs.get('size', 500)
    ylabel = kwargs.get('ylabel', 'time')
    cmap = kwargs.get('cmap', 'rainbow')
    lloc = kwargs.get('lloc', 2)
    dataT = {}
    # set color scheme
    c = iter(plt.get_cmap(cmap)(np.linspace(0, 1, len(data))))
    # transpose the data from [x, y, z]... into ([x...], [y...], [z...])
    for k, v in data.items():
        dataT[k] = zip(*v)
    fig, ax = plt.subplots()
    for k, v in dataT.items():
        if style == 'scatter':
            ax.scatter(v[series], v[-1], s=size, c=next(c), alpha=.75)
        elif style == 'bubble':
            x, y, z = v[series[0]], v[series[1]], v[-1]
            ax.scatter(x, y, s=[size*i for i in z], c=next(c), alpha=.5)
        else:
            ax.plot(v[series], v[-1], c=next(c), lw=2)
    # TODO: BUG: no way to set other parameters manually (README fig2)
    ax.legend([substitute_titles(k,series) for k in dataT.keys()], loc=lloc)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.grid(True)
    return fig


def timeit_plot3D(data, xlabel='xlabel', ylabel='ylabel', **kwargs):
    """3D plot of timeit data, one chart per function.
    """
    dataT = {}
    figs = []
    series = kwargs.get('series', (0,1))
    cmap = kwargs.get('cmap', cm.coolwarm)
    for k, v in data.items():
        dataT[k] = zip(*v)
        fig = plt.figure()
        ax = fig.gca(projection='3d')
        X, Y, Z = dataT[k][series[0]], dataT[k][series[1]], dataT[k][-1]
        wide, tall = (max(X)-min(X)+1), (max(Y)-min(Y)+1)
        intervalX = max(X) - min(heapq.nlargest(2,set(X)))
        intervalY = max(Y) - min(heapq.nlargest(2,set(Y)))
        wide, tall = 1+wide/intervalX, 1+tall/intervalY
        X = np.reshape(X, [wide, tall])
        Y = np.reshape(Y, [wide, tall])
        # TODO: BUG: fix so that Z transposes with x & y reversed
        Z = np.reshape(Z, [wide, tall])
        surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cmap, linewidth=0, antialiased=False)
        ax.zaxis.set_major_locator(LinearLocator(10))
        ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(substitute_titles(k,series))
        fig.colorbar(surf, shrink=0.5, aspect=5)
        figs.append(fig)
    return figs


def timeit_heatmap(data, xlabel='xlabel', ylabel='ylabel', **kwargs):
    """Heatmap plot of timeit data, one chart per function.
    """
    dataT = {}
    figs = []
    series = kwargs.get('series', (0,1))
    cmap = kwargs.get('cmap', cm.coolwarm)
    for k, v in data.items():
        dataT[k] = zip(*v)
        X, Y, Z = dataT[k][series[0]], dataT[k][series[1]], dataT[k][-1]
        left, right = min(X), max(X)
        bottom, top = min(Y), max(Y)
        extent = [left, right, bottom, top]
        wide, tall = (max(X)-min(X)+1), (max(Y)-min(Y)+1)
        intervalX = max(X) - min(heapq.nlargest(2,set(X)))
        intervalY = max(Y) - min(heapq.nlargest(2,set(Y)))
        if intervalX > 1:
            wide = 1 + wide/intervalX
        else:
            wide = 1
        if intervalY > 1:
            tall = 1 + tall/intervalY
        else:
            tall = 1
        # TODO: BUG: fix so that Z transposes with x & y series reversed
        Z = np.reshape(Z, [wide, tall])
        Z = list(zip(*Z))           # Z is transposed
        Z = [i for i in Z[::-1]]    # Z is upside down
        fig, ax = plt.subplots()
        hmap = ax.imshow(Z, extent=extent, cmap=cmap, interpolation='nearest')
        fig.colorbar(hmap).set_label("time")
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(substitute_titles(k,series))
        figs.append(fig)
    return figs

def substitute_titles(label, series):
    ordered_axes=["x", "y", "z"]
    try:
        for i, v in enumerate(series):
            label = label.replace("{"+str(v)+"}", ordered_axes[i])
    except:
        label = label.replace("{"+str(series)+"}", ordered_axes[0])
    return label

	import timeit
	from collections import defaultdict
	from itertools import product
	import heapq
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.cm as cm
	from mpl_toolkits.mplot3d import Axes3D
	from matplotlib.ticker import LinearLocator, FormatStrFormatter


	def timeit_compare(funcs, inputs, setups='pass', **kwargs):
	"""Compares speed of functions across input conditions.

	'funcs' should be a list of functions expressed as strings.
	String substitution is done on each function while iterating
	over ranges of values in 'inputs' to compare speed.

	'inputs' should be an iterable range of values over which 'funcs'
	should be tested.

	'setups' can be 'pass' for no setup, 'main' to import each function
	from the local environment, or a list of setup strings that maps
	to the list 'funcs'.

	Other inputs:
	'number' - the number of times to run the function (a timeit option)
	'print_conditions' - if True, print the combinations of input conditions

	>>> functions = ['"-".join(str(n) for n in range({0}))',
	'"-".join([str(n) for n in range({0})])',
	'"-".join(map(str, range({0})))']
	>>> data = timeit_compare(functions, [range(10,101,10)], number=10000)
	testing "-".join(str(n) for n in range({0}))...
	testing "-".join([str(n) for n in range({0})])...
	testing "-".join(map(str, range({0})))...

	Returns a defaultdict that has function names as keys, results as values.
	"""
	number = kwargs.get('number', 100000)
	print_conditions = kwargs.get('print_conditions', False)
	performance = defaultdict(list)
	if isinstance(setups, list):
	# user specifies their own list of setups corresponding to funcs
	pass
	elif setups == 'pass':
	# specify no setups for built-in functions like join
	setups = ['pass' for f in funcs]
	elif setups == 'main':
	# uniformly import all setups from the local environment
	fnames = [f[:f.find("(")] for f in funcs]
	setups = ["from __main__ import " + fname for fname in fnames]

	# convert the input ranges to a set of conditions
	conditions = get_conditions(inputs)
	if print_conditions:
	print "conditions: " + conditions

	def timer(func, value, setup):
	return timeit.Timer(func.format(*value), setup=setup)

	for i, f in enumerate(funcs):
	print "testing " + f + "..."
	for value in conditions:
	test = timer(f, value, setups[i])
	result = test.timeit(number=number)
	performance[f].append(list(value) + [result])
	return performance


	def get_conditions(inputs):
	"""Converts conditions for individual variables into an
	exhaustive list of combinations for timeit_compare().
	"""
	# itertools.product summarizes all combinations of ordered conditions
	# at len = 1 it wraps values in tuples (0,) that confuse the timer below
	if hasattr(inputs[0], '__iter__'):
	return list(product(*inputs))
	else:
	return [[n] if not isinstance(n,(list,tuple)) else n for n in inputs]


	# TODO: function to filter a large dataset
	# TODO: filter and options to plot 2 and 3 grams in different colors
	# on the same chart


	def timeit_plot2D(data, xlabel='xlabel', title='title', **kwargs):
	"""Plots the results from a defaultdict returned by timeit_compare.

	Each function will be plotted as a different series.

	timeit_compare may test many conditions, and the order of the conditions
	in the results data can be understood from the string substitutions
	noted in the keys of the defaultdict. By default series=0 means
	that the first series is plotted, but this can be changed to plot
	any of the testing conditions available.
	"""
	series = kwargs.get('series', 0)
	style = kwargs.get('style', 'line')
	size = kwargs.get('size', 500)
	ylabel = kwargs.get('ylabel', 'time')
	cmap = kwargs.get('cmap', 'rainbow')
	lloc = kwargs.get('lloc', 2)
	dataT = {}
	# set color scheme
	c = iter(plt.get_cmap(cmap)(np.linspace(0, 1, len(data))))
	# transpose the data from [x, y, z]... into ([x...], [y...], [z...])
	for k, v in data.items():
	dataT[k] = zip(*v)
	fig, ax = plt.subplots()
	for k, v in dataT.items():
	if style == 'scatter':
	ax.scatter(v[series], v[-1], s=size, c=next(c), alpha=.75)
	elif style == 'bubble':
	x, y, z = v[series[0]], v[series[1]], v[-1]
	ax.scatter(x, y, s=[size*i for i in z], c=next(c), alpha=.5)
	else:
	ax.plot(v[series], v[-1], c=next(c), lw=2)
	# TODO: BUG: no way to set other parameters manually (README fig2)
	ax.legend([substitute_titles(k,series) for k in dataT.keys()], loc=lloc)
	ax.set_xlabel(xlabel)
	ax.set_ylabel(ylabel)
	ax.set_title(title)
	ax.grid(True)
	return fig


	def timeit_plot3D(data, xlabel='xlabel', ylabel='ylabel', **kwargs):
	"""3D plot of timeit data, one chart per function.
	"""
	dataT = {}
	figs = []
	series = kwargs.get('series', (0,1))
	cmap = kwargs.get('cmap', cm.coolwarm)
	for k, v in data.items():
	dataT[k] = zip(*v)
	fig = plt.figure()
	ax = fig.gca(projection='3d')
	X, Y, Z = dataT[k][series[0]], dataT[k][series[1]], dataT[k][-1]
	wide, tall = (max(X)-min(X)+1), (max(Y)-min(Y)+1)
	intervalX = max(X) - min(heapq.nlargest(2,set(X)))
	intervalY = max(Y) - min(heapq.nlargest(2,set(Y)))
	wide, tall = 1+wide/intervalX, 1+tall/intervalY
	X = np.reshape(X, [wide, tall])
	Y = np.reshape(Y, [wide, tall])
	# TODO: BUG: fix so that Z transposes with x & y reversed
	Z = np.reshape(Z, [wide, tall])
	surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cmap, linewidth=0, antialiased=False)
	ax.zaxis.set_major_locator(LinearLocator(10))
	ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
	ax.set_xlabel(xlabel)
	ax.set_ylabel(ylabel)
	ax.set_title(substitute_titles(k,series))
	fig.colorbar(surf, shrink=0.5, aspect=5)
	figs.append(fig)
	return figs


	def timeit_heatmap(data, xlabel='xlabel', ylabel='ylabel', **kwargs):
	"""Heatmap plot of timeit data, one chart per function.
	"""
	dataT = {}
	figs = []
	series = kwargs.get('series', (0,1))
	cmap = kwargs.get('cmap', cm.coolwarm)
	for k, v in data.items():
	dataT[k] = zip(*v)
	X, Y, Z = dataT[k][series[0]], dataT[k][series[1]], dataT[k][-1]
	left, right = min(X), max(X)
	bottom, top = min(Y), max(Y)
	extent = [left, right, bottom, top]
	wide, tall = (max(X)-min(X)+1), (max(Y)-min(Y)+1)
	intervalX = max(X) - min(heapq.nlargest(2,set(X)))
	intervalY = max(Y) - min(heapq.nlargest(2,set(Y)))
	if intervalX > 1:
	wide = 1 + wide/intervalX
	else:
	wide = 1
	if intervalY > 1:
	tall = 1 + tall/intervalY
	else:
	tall = 1
	# TODO: BUG: fix so that Z transposes with x & y series reversed
	Z = np.reshape(Z, [wide, tall])
	Z = list(zip(*Z)) # Z is transposed
	Z = [i for i in Z[::-1]] # Z is upside down
	fig, ax = plt.subplots()
	hmap = ax.imshow(Z, extent=extent, cmap=cmap, interpolation='nearest')
	fig.colorbar(hmap).set_label("time")
	ax.set_xlabel(xlabel)
	ax.set_ylabel(ylabel)
	ax.set_title(substitute_titles(k,series))
	figs.append(fig)
	return figs

	def substitute_titles(label, series):
	ordered_axes=["x", "y", "z"]
	try:
	for i, v in enumerate(series):
	label = label.replace("{"+str(v)+"}", ordered_axes[i])
	except:
	label = label.replace("{"+str(series)+"}", ordered_axes[0])
	return label