Skip to content

Instantly share code, notes, and snippets.

@aabadie
Created May 18, 2016 09:37
Show Gist options
  • Save aabadie/17e46572a8d408ea0e0098a33349e432 to your computer and use it in GitHub Desktop.
Save aabadie/17e46572a8d408ea0e0098a33349e432 to your computer and use it in GitHub Desktop.
Generate nice plot from persistence strategies comparison script
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# General configuration variables
# Script configuration variables:
CSV_FILE = '/tmp/comparison_results.csv'
PNG_FILE = '/tmp/comparison_results.png'
DATASET = 'lfw_people'
TRIES = 1
SHOW_PLOT = True
DATASET_DESC = {'lfw_people': 'Labeled Faces in the Wild dataset (LFW)',
'big_array': 'Numpy array with random values (~700MB)',
'big_dict': 'Dictionary with 1M random keys/values',
'big_list': 'List of 10M random values'}
###############################################################################
# Plot function
def generate_plots():
"""Generate a nice matplotlib figure."""
if not os.path.exists(CSV_FILE):
print("CSV file doesn't exist, exiting")
return
df = pd.read_csv(CSV_FILE)
df = df[df.dataset == DATASET] # filter on dataset
if not len(df):
print("Nothing to plot, exiting")
return
# Set up the matplotlib figure
sns.set(style="whitegrid", context="talk")
f, (dump_axe, load_axe, mem_dump_axe, mem_load_axe, disk_axe) = \
plt.subplots(1, 5, figsize=(9, 4.2), sharey=True,
gridspec_kw=dict(wspace=.7, right=.947, bottom=.005,
top=.85, left=.14))
df.strategy = [s.replace(' ', '\n', 1)
.replace('0.10.0.dev0', 'dev')
.replace(' -', ', ')
for s in df.strategy]
strategies = df.strategy
dump_times = df.dump
load_times = df.load
memory_dump = df.mem_dump
memory_load = df.mem_load
disk_used = df.disk
plt.text(.005, .96, '{0}'.format(DATASET_DESC[DATASET]), size=13,
transform=f.transFigure)
sns.barplot(dump_times, strategies, palette="Set3", ax=dump_axe)
dump_axe.set_title("Dump time")
dump_axe.set_xlabel("")
dump_axe.set_ylabel("")
for i, v in enumerate(strategies.unique()):
value = df[df.strategy == v].dump.mean()
dump_axe.text(value + 0.01 * max(dump_times),
i + .15, "{0:.2G}s".format(value),
color='black', style='italic')
dump_axe.set_xticks(())
sns.barplot(load_times, strategies, palette="Set3", ax=load_axe)
load_axe.set_title("Load time")
load_axe.set_xlabel("")
load_axe.set_ylabel("")
for i, v in enumerate(strategies.unique()):
value = df[df.strategy == v].load.mean()
load_axe.text(value + 0.01 * max(load_times),
i + .15, "{0:.2G}s".format(value),
color='black', style='italic')
load_axe.set_xticks(())
sns.barplot(memory_dump, strategies, palette="Set3", ax=mem_dump_axe)
mem_dump_axe.set_title("Memory used\nwith dump")
mem_dump_axe.set_xlabel("")
mem_dump_axe.set_ylabel("")
for i, v in enumerate(strategies.unique()):
value = df[df.strategy == v].mem_dump.mean()
mem_dump_axe.text(value + 0.01 * max(memory_dump),
i + .15, "{0:.0f}MB".format(value),
color='black', style='italic')
mem_dump_axe.set_xticks(())
sns.barplot(memory_load, strategies, palette="Set3", ax=mem_load_axe)
mem_load_axe.set_title("Memory used\nwith load")
mem_load_axe.set_xlabel("")
mem_load_axe.set_ylabel("")
for i, v in enumerate(strategies.unique()):
value = df[df.strategy == v].mem_load.mean()
mem_load_axe.text(value + 0.01 * max(memory_load),
i + .15, "{0:.0f}MB".format(value),
color='black', style='italic')
mem_load_axe.set_xticks(())
sns.barplot(disk_used, strategies, palette="Set3", ax=disk_axe)
disk_axe.set_title("Disk used")
disk_axe.set_xlabel("")
disk_axe.xaxis.tick_top()
disk_axe.set_ylabel("")
for i, v in enumerate(strategies.unique()):
value = df[df.strategy == v].disk.mean()
disk_axe.text(value + 0.01 * max(disk_used),
i + .15, "{0:.0f}MB".format(value),
color='black', style='italic')
disk_axe.set_xticks(())
sns.despine(bottom=True)
plt.savefig(PNG_FILE, dpi=100)
if SHOW_PLOT:
plt.show()
if __name__ == '__main__':
generate_plots()
@aabadie
Copy link
Author

aabadie commented May 18, 2016

How to use this script

  1. Generate bench results with this gist
  2. Run this script:
python  strategies_comparison_plot.py

comparison_results

Notes

You can tweak the global variables in order to modify the behaviour of the script.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment