Last active
October 20, 2016 19:15
-
-
Save dhermes/306a390aa688f8322504819afaefb7a1 to your computer and use it in GitHub Desktop.
Is += really worse than ''.join([ ... ]) for Python strings?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python string_quadratic.py --content a --filename example01.png; | |
python string_quadratic.py --content ab --filename example02.png; | |
python string_quadratic.py --content abcd --filename example03.png; | |
python string_quadratic.py --content abcdefgh --filename example04.png |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import time | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn | |
import six | |
COLORS = seaborn.husl_palette(6) | |
BLUE = COLORS[4] | |
RED = COLORS[0] | |
NUM_DOUBLES = 8 | |
def str_concat(n, content): | |
value = '' | |
for i in six.moves.xrange(n): | |
value += content | |
return value | |
def list_append_str_join(n, content): | |
values = [] | |
for i in six.moves.xrange(n): | |
values.append(content) | |
return ''.join(values) | |
def time_runs(base_n, content, concat_method): | |
n = base_n | |
n_vals = [] | |
durations = [] | |
for _ in six.moves.xrange(NUM_DOUBLES): | |
start = time.time() | |
concat_method(n, content) | |
duration = time.time() - start | |
# Keep around the values used. | |
n_vals.append(n) | |
durations.append(duration) | |
# Update at the end. | |
n *= 2 | |
return n_vals, durations | |
def get_loglog_fit(x_vals, y_vals): | |
log_x = np.log2(x_vals) | |
m, b = np.polyfit(log_x, np.log2(y_vals), 1) | |
fitted_y = 2.0**(b + m * log_x) | |
return m, fitted_y | |
def main(): | |
parser = argparse.ArgumentParser( | |
description='Show that string concat is quadratic.') | |
parser.add_argument('--base-n', dest='base_n', | |
type=int, default=8, | |
help='Base number of concatenations.') | |
parser.add_argument('--content', default='1', | |
help='Bytes to concat.') | |
parser.add_argument('--filename', | |
help='Filename to save plot in.') | |
args = parser.parse_args() | |
# First time the two methods. | |
n_vals, durations_concat = time_runs( | |
args.base_n, args.content, str_concat) | |
_, durations_append = time_runs( | |
args.base_n, args.content, list_append_str_join) | |
# Then, plot the relevant "+= / concat" lines | |
slope, fit_vals = get_loglog_fit(n_vals, durations_concat) | |
label = r'str concat ($T \propto n^{%1.3f}$)' % (slope,) | |
plt.loglog(n_vals, durations_concat, marker='o', | |
color=RED, linestyle='None', label=label) | |
plt.loglog(n_vals, fit_vals, linestyle='dashed', | |
color=RED, alpha=0.5) | |
# Then, plot the relevant "append" lines | |
slope, fit_vals = get_loglog_fit(n_vals, durations_append) | |
label = r'list append ($T \propto n^{%1.3f}$)' % (slope,) | |
plt.loglog(n_vals, durations_append, marker='o', | |
color=BLUE, linestyle='None', label=label) | |
plt.loglog(n_vals, fit_vals, linestyle='dashed', | |
color=BLUE, alpha=0.5) | |
# Add some labels to make the xticks clearer. | |
labels = [r'$%d \cdot 2^%d$' % (args.base_n, exp) | |
for exp in six.moves.xrange(NUM_DOUBLES)] | |
plt.xticks(n_vals, labels) | |
plt.ylabel('Runtime') | |
plt.xlabel('Number of Values') | |
plt.title('Concatenate n times: %r' % (args.content,)) | |
plt.legend(loc='upper left', fontsize=12) | |
if args.filename is None: | |
plt.show() | |
else: | |
plt.savefig(args.filename, bbox_inches='tight') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment