Created
November 16, 2019 08:17
-
-
Save pomidoroshev/219ee87b1a138b869c9ce429a2592688 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! -*- coding: utf-8 -*- | |
import csv | |
import io | |
import itertools | |
import functools | |
import math | |
import sys | |
import timeit | |
if sys.version_info < (3,): | |
StringIO = io.BytesIO | |
else: | |
StringIO = io.StringIO | |
def string_join(n_components, s): | |
"""Allocate a list of strings and concatenate them with join.""" | |
str_components = [] | |
for i in range(n_components): | |
str_components.append("%d, %s" % (i, s)) | |
return "".join(str_components) | |
def string_buffer(n_components, s): | |
"""Build a string by incrementally writing to a StringIO.""" | |
buffer = StringIO() | |
for i in range(n_components): | |
buffer.write("%d, %s" % (i, s)) | |
buffer.seek(0) | |
return buffer.read() | |
def string_concat(n_components, s): | |
"""Incrementally build a string with +=""" | |
str_out = "" | |
for i in range(n_components): | |
str_out += "%d, %s" % (i, s) | |
return str_out | |
if sys.version_info < (3, 4): | |
# Backport simplified versions of some statistics functions | |
def mean(values): | |
return sum(values) / len(values) | |
def stdev(values, xbar=None): | |
N = len(values) | |
if xbar is None: | |
xbar = mean(values) | |
variance = sum(((x - xbar) ** 2 for x in values)) / (N - 1) | |
return math.sqrt(variance) | |
else: | |
from statistics import mean, stdev | |
def _format_truncated(value, truncation): | |
"""Round and then format the number with an appropriate level of truncation.""" | |
truncated_value = round(value, truncation) | |
truncation = max([truncation, 0]) | |
fmt_str = "0.%df" % truncation # "0.1f" -> "13.1", "0.0f" -> "13" | |
return format(truncated_value, fmt_str) | |
def _format_mean_std(mean_val, std_val): | |
"""Write a string with the approrpiate number of significant figures. | |
This will round the mean so that the lowest significant digit is the | |
highest significant digit of the standard deviation, e.g. | |
>>> _format_mean_std(1.24, 0.3) | |
"1.2 (± 0.3)" | |
>>> _format_mean_std(5.7883, 11.9344) | |
"6 (± 10)" | |
>>> _format_mean_std(5.7883, 0.02455) | |
"5.79 (± 0.02)" | |
Standard deviation is only reported to 1 significant figure. | |
""" | |
# Assume we have 1.24 ± 0.3, we want to round to 1.2, since any sig figs | |
# an order of magnitude smaller than the standard deviation are suspect | |
std_mag = math.log(std_val, 10) | |
# Truncate the standard deviation to 1 sig fig. | |
std_truncation = -int(math.floor(std_mag)) | |
mean_truncation = -1 * int(std_mag // 1) | |
# Don't truncate too far - if the stdev is >= the mean, leave 1 std | |
mean_round_mag = -math.log(abs(mean_val), 10) | |
if mean_round_mag > mean_truncation: | |
mean_truncation = math.ceil(mean_round_mag) | |
mean_str = _format_truncated(mean_val, mean_truncation) | |
std_str = _format_truncated(std_val, std_truncation) | |
return "%s (± %s)" % (mean_str, std_str) | |
def _time_with_std(timer, number, k=5): | |
"""Get timing information in microseconds with std. | |
Runs the ``timer`` timer with ``number`` repetitions of the snippet ``k`` | |
times and returns a string of the form "mean (±std)". | |
""" | |
timing_values = [] | |
for _ in range(k): | |
timing_value = timer.timeit(number=number) | |
timing_value *= 1e6 / number # Convert to microseconds | |
timing_values.append(timing_value) | |
time_mean = mean(timing_values) | |
time_std = stdev(timing_values, xbar=time_mean) | |
return time_mean, time_std | |
def main(csv_mode): | |
outputs = [] | |
setup = "from __main__ import string_join, string_concat, string_buffer" | |
for n in [10, 100, 1000, 10000]: | |
number = max((100000 // n, 1)) | |
join_timer = timeit.Timer("string_join(%d, 'foobar')" % n, setup=setup) | |
buffer_timer = timeit.Timer("string_buffer(%d, 'foobar')" % n, setup=setup) | |
concat_timer = timeit.Timer("string_concat(%d, 'foobar')" % n, setup=setup) | |
time_with_std = functools.partial(_time_with_std, number=number, k=11) | |
timing_results = map(time_with_std, (join_timer, buffer_timer, concat_timer)) | |
formatted_strings = itertools.starmap(_format_mean_std, timing_results) | |
outputs.append((n,) + tuple(formatted_strings)) | |
if csv_mode: | |
writer = csv.writer(sys.stdout) | |
writer.writerows(outputs) | |
else: | |
header = "{:^20} | {:^20} | {:^20} | {:^20}".format( | |
"# components", "join_time (μs)", "buffer_time (μs)", "concat_time (μs)" | |
) | |
print(header) | |
print("-" * len(header)) | |
for results in outputs: | |
print("{:<20} | {:^20} | {:^20} | {:^20}".format(*results)) | |
if __name__ == "__main__": | |
# Don't want to bother with a real argparse for this... | |
csv_mode = len(sys.argv) == 2 and sys.argv[1] == "--csv" | |
main(csv_mode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment