Skip to content

Instantly share code, notes, and snippets.

@BrambleXu
Created December 25, 2019 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BrambleXu/24f99d2ccf228b98c90581de4aa10a81 to your computer and use it in GitHub Desktop.
Save BrambleXu/24f99d2ccf228b98c90581de4aa10a81 to your computer and use it in GitHub Desktop.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
st = {2: 1478515,
3: 449113,
4: 646495,
5: 166796,
6: 21064,
7: 11003,
8: 131289,
9: 20193,
10: 24,
12: 4483,
13: 1621,
14: 2,
16: 3064,
17: 1454,
18: 6,
19: 1,
24: 1}
df = pd.DataFrame({'Alias Length': list(st.keys()),
'Alias Counts': list(st.values())})
def pareto_plot(df, x=None, y=None, title=None, show_pct_y=False, pct_format='{0:.0%}'):
xlabel = x
ylabel = y
tmp = df.sort_values(x)
x = tmp[x].values
y = tmp[y].values
weights = y / y.sum()
cumsum = weights.cumsum()
fig, ax1 = plt.subplots()
ax1.bar(x, y)
ax1.set_xlabel(xlabel)
ax1.set_ylabel(ylabel)
ax2 = ax1.twinx()
ax2.plot(x, cumsum, '-ro', alpha=0.5)
ax2.set_ylabel('', color='r')
ax2.tick_params('y', colors='r')
vals = ax2.get_yticks()
ax2.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
# hide y-labels on right side
if not show_pct_y:
ax2.set_yticks([])
formatted_weights = [pct_format.format(x) for x in cumsum]
for i, txt in enumerate(formatted_weights):
ax2.annotate(txt, (x[i], cumsum[i]), fontweight='heavy')
if title:
plt.title(title)
plt.show()
pareto_plot(df, x='Alias Length', y='Alias Counts', title='Alias Pareto Distribution')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment