Skip to content

Instantly share code, notes, and snippets.

@embg
Created March 17, 2022 15:35
Show Gist options
  • Save embg/10cc75c9639382d715cf570e4cd292cc to your computer and use it in GitHub Desktop.
Save embg/10cc75c9639382d715cf570e4cd292cc to your computer and use it in GitHub Desktop.
import sys
import os
import scipy.stats
import numpy
tags = ['gcc11', 'clang12']
def get_binary_pairs(filenames):
groups = {tag:[] for tag in tags}
for filename in filenames:
for tag in tags:
if tag in filename:
groups[tag].append(filename)
for group in groups.values():
assert(len(group) == 2)
return groups
def read_data(dirpath, filenames):
data = dict()
for filename in filenames:
lines = open(os.path.join(dirpath, filename)).read().splitlines()
data[filename.rstrip('.csv')] = list(map(float, lines[1:]))
return data
def bootstrap_mean(A):
CI = scipy.stats.bootstrap([numpy.array(A)], numpy.mean, vectorized=False, method='percentile').confidence_interval
return CI_to_value_with_error(CI.low, CI.high)
def bootstrap_percent_increase(A, B):
def percent_increase(A, B):
return 100 * (numpy.mean(A) / numpy.mean(B) - 1)
CI = scipy.stats.bootstrap([numpy.array(A), numpy.array(B)], percent_increase, vectorized=False, method='percentile').confidence_interval
return CI_to_value_with_error(CI.low, CI.high)
def CI_to_value_with_error(low, high):
value = (low + high) / 2
error = high - value
return value, error
# def bootstrap_percent_diff_CI(A, B):
def print_analysis(dirpath, filenames):
binary_pairs = get_binary_pairs(filenames)
for tag, filenames in binary_pairs.items():
print(f"\t{tag}")
data = read_data(dirpath, filenames)
for case, measurements in data.items():
mean, error = bootstrap_mean(measurements)
print(f"\t\t{case}: {mean:.1f} MB/s (± {error:.1f} MB/s)")
cases = list(data.keys())
percent_increase, error = bootstrap_percent_increase(*data.values())
print(f"\t\t({cases[0]} / {cases[1]}) - 1: {percent_increase:.1f}% (± {error:.1f}%)")
if __name__ == '__main__':
for dirpath, dirnames, filenames in os.walk(sys.argv[1]):
if any('.csv' in filename for filename in filenames):
print(dirpath)
print_analysis(dirpath, filenames)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment