Skip to content

Instantly share code, notes, and snippets.

@Thuruv
Forked from jesseh/pareto.py
Created July 28, 2016 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Thuruv/8cb6a83dc0b555e9a220e977c36e2995 to your computer and use it in GitHub Desktop.
Save Thuruv/8cb6a83dc0b555e9a220e977c36e2995 to your computer and use it in GitHub Desktop.
Pareto Chart with Matplotlib
import pandas as pd
import matplotlib.pyplot as plt
def combined_label(perc, tot):
"""
Format a label to include by Euros and %.
"""
return "{0:,.0f}k EUR, {1:.0f}%".format(perc * tot / 1000, perc * 100)
def cost_cum(data, focus, subject):
"""
Accumulate the stats.
- data is a DataFrame,
- focus is the colum to group by
- subject is the column to aggregate.
"""
# Setup data frame
parts = data[[focus, 'cost']].groupby(focus).sum().sort(subject, ascending=False)
parts['percent'] = parts['cost'] / parts.cost.sum()
parts['cum_percent'] = parts['percent'].cumsum()
return parts
def cost_pareto(data, focus_name, limit_percent = 0.75):
# Filter and organize the data frame
top_parts = data[data['cum_percent'] < limit_percent]
top_parts.set_index(top_parts['percent'])
# Draw the plots
fig = plt.figure(figsize=(10,7))
fig.subplots_adjust(bottom=0.4, left=0.15)
ax = fig.add_subplot(1,1,1)
top_parts['cum_percent'].plot(ax=ax, color="k", drawstyle="steps-post")
top_parts['percent'].plot(ax=ax, kind="bar", color="k", alpha=0.5)
ax.set_ylim(bottom=0, top=1)
tick_nums = [x/float(100) for x in range(0,101,20)]
ax.set_yticks(tick_nums)
tot_cost = top_parts['cost'].sum()
ax.set_yticklabels([combined_label(x, tot_cost) for x in tick_nums])
ax.set_title("Top %s%% of Cost Split By %s" % (int(limit_percent * 100), focus))
ax.set_xlabel("")
return ax
accumulated = cost_cum(data, 'Part', 'Cost')
chart = cost_pareto(accumulated, 'Part', 0.9)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment