Is += really worse than ''.join([ ... ]) for Python strings?
python string_quadratic.py --content a --filename example01.png; | |
python string_quadratic.py --content ab --filename example02.png; | |
python string_quadratic.py --content abcd --filename example03.png; | |
python string_quadratic.py --content abcdefgh --filename example04.png |
import argparse | |
import time | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn | |
import six | |
COLORS = seaborn.husl_palette(6) | |
BLUE = COLORS[4] | |
RED = COLORS[0] | |
NUM_DOUBLES = 8 | |
def str_concat(n, content): | |
value = '' | |
for i in six.moves.xrange(n): | |
value += content | |
return value | |
def list_append_str_join(n, content): | |
values = [] | |
for i in six.moves.xrange(n): | |
values.append(content) | |
return ''.join(values) | |
def time_runs(base_n, content, concat_method): | |
n = base_n | |
n_vals = [] | |
durations = [] | |
for _ in six.moves.xrange(NUM_DOUBLES): | |
start = time.time() | |
concat_method(n, content) | |
duration = time.time() - start | |
# Keep around the values used. | |
n_vals.append(n) | |
durations.append(duration) | |
# Update at the end. | |
n *= 2 | |
return n_vals, durations | |
def get_loglog_fit(x_vals, y_vals): | |
log_x = np.log2(x_vals) | |
m, b = np.polyfit(log_x, np.log2(y_vals), 1) | |
fitted_y = 2.0**(b + m * log_x) | |
return m, fitted_y | |
def main(): | |
parser = argparse.ArgumentParser( | |
description='Show that string concat is quadratic.') | |
parser.add_argument('--base-n', dest='base_n', | |
type=int, default=8, | |
help='Base number of concatenations.') | |
parser.add_argument('--content', default='1', | |
help='Bytes to concat.') | |
parser.add_argument('--filename', | |
help='Filename to save plot in.') | |
args = parser.parse_args() | |
# First time the two methods. | |
n_vals, durations_concat = time_runs( | |
args.base_n, args.content, str_concat) | |
_, durations_append = time_runs( | |
args.base_n, args.content, list_append_str_join) | |
# Then, plot the relevant "+= / concat" lines | |
slope, fit_vals = get_loglog_fit(n_vals, durations_concat) | |
label = r'str concat ($T \propto n^{%1.3f}$)' % (slope,) | |
plt.loglog(n_vals, durations_concat, marker='o', | |
color=RED, linestyle='None', label=label) | |
plt.loglog(n_vals, fit_vals, linestyle='dashed', | |
color=RED, alpha=0.5) | |
# Then, plot the relevant "append" lines | |
slope, fit_vals = get_loglog_fit(n_vals, durations_append) | |
label = r'list append ($T \propto n^{%1.3f}$)' % (slope,) | |
plt.loglog(n_vals, durations_append, marker='o', | |
color=BLUE, linestyle='None', label=label) | |
plt.loglog(n_vals, fit_vals, linestyle='dashed', | |
color=BLUE, alpha=0.5) | |
# Add some labels to make the xticks clearer. | |
labels = [r'$%d \cdot 2^%d$' % (args.base_n, exp) | |
for exp in six.moves.xrange(NUM_DOUBLES)] | |
plt.xticks(n_vals, labels) | |
plt.ylabel('Runtime') | |
plt.xlabel('Number of Values') | |
plt.title('Concatenate n times: %r' % (args.content,)) | |
plt.legend(loc='upper left', fontsize=12) | |
if args.filename is None: | |
plt.show() | |
else: | |
plt.savefig(args.filename, bbox_inches='tight') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment