Skip to content

Instantly share code, notes, and snippets.

@camriddell
Last active March 7, 2023 00:51
Show Gist options
  • Save camriddell/c5de7372fd57ee36d7e8448d66cea027 to your computer and use it in GitHub Desktop.
Save camriddell/c5de7372fd57ee36d7e8448d66cea027 to your computer and use it in GitHub Desktop.
from scipy.stats import norm, uniform, gamma
from matplotlib.pyplot import rc, rcdefaults, subplots, imread, show
from matplotlib.lines import Line2D
from matplotlib.ticker import MultipleLocator
from matplotlib.transforms import blended_transform_factory
from matplotlib.offsetbox import VPacker, TextArea, AnchoredOffsetbox, DrawingArea
from matplotlib.image import BboxImage
from numpy.random import default_rng
from numpy import arange, linspace, broadcast_to
rcdefaults()
rc('figure', facecolor='white')
rc('font', size=16)
## All populations should have central tendency ~70,
# keeps things comparable across distributions
populations = [
norm(loc=70, scale=3), uniform(60, 20), gamma(1.99, loc=65, scale=2)
]
fig, axes = subplots(
3, len(populations), figsize=(20, 16),
sharex=True, sharey='row',
gridspec_kw={
'height_ratios': [1, 5, 1], 'hspace': 0, 'wspace': .1,
'bottom': .08, 'right': .9, 'left': .3, 'top': .9
},
dpi=104
)
for pop, ax in zip(populations, axes[0]):
xs = linspace(*pop.ppf([.001, .999]), 4_000)
ax.fill_between(xs, pop.pdf(xs), alpha=.5, label='Distribution')
ax.axvline(
pop.mean(), 0, .95, ls='dashed', color='k', lw=2, label=r'Mean $\mu$'
)
ax.set_title(pop.dist.name.title(), fontsize='large')
ax.xaxis.set_tick_params(length=0)
ax.yaxis.set_tick_params(labelleft=False, length=0)
ax.margins(y=0)
### Running & Displaying the Simulation
rng = default_rng(0)
n_samples, sample_size = 100, 25
samples = [
p.rvs(size=(n_samples, sample_size), random_state=rng)
for p in populations
]
ys = arange(n_samples)
scatter_ys = broadcast_to(ys[:, None], (n_samples, sample_size))
rng = default_rng(0)
for s, samp_ax in zip(samples, axes[1, :]):
smean, sdev = s.mean(axis=1), s.std(axis=1)
samp_ax.scatter(
s, y=scatter_ys, s=4, c='tab:blue', alpha=.7, label='Observations'
)
samp_ax.fill_betweenx(
ys, x1=smean - sdev, x2=smean + sdev, color='tab:orange',
alpha=.3, label='Std. Dev. $S_x$'
)
samp_ax.plot(smean, ys, color='tab:orange', label=r'Mean $\bar{x}$')
samp_ax.xaxis.set_tick_params(length=0)
### Creating the Sampling Distributions
for s, mean_ax in zip(samples, axes[2, :]):
smean, sdev = s.mean(axis=1), s.std(axis=1)
mean_ax.axvline(
smean.mean(), ymin=.5, ymax=.9, c='k', ls='dashed',
lw=2, label='Est. Pop. Mean $\hat{\mu}$'
)
norm_density = norm(*norm.fit(smean))
xs = linspace(*norm_density.ppf([.001, .999]), 4000)
mean_ax.fill_between(
xs, -norm_density.pdf(xs), label='Fitted Gaussian',
alpha=.5, color='tab:orange'
)
mean_ax.hist(
smean, bins='auto', label=r'Sample Means $\bar{x}$',
density=True, color='tab:orange', ec='white'
)
mean_ax.yaxis.set_visible(False)
mean_ax.spines['bottom'].set_position('zero')
### Cleaning Aesthetics
# Set custom ticks and limits to the Simulation & Sampling Dist. Axes
# The y-axes are shared across rows of the figure,
# so we only need to invert 1 y-axis out of the row of sample Axes
samp_ax = axes[1, 0]
samp_ax.yaxis.set_major_locator(MultipleLocator(10))
samp_ax.yaxis.set_major_formatter('Simulation {x:.0f}')
samp_ax.invert_yaxis()
# samp_ax.set_ylabel('Simulation', size='large', rotation=-90, va='top')
# Manually set the xlimits, they're shared across all Axes
# the population means hover ~70, so we drop that xtick for visibility
mean_ax = axes[2, 0]
mean_ax.set(xlim=(60, 80), xticks=[60, 65, 75, 80])
mean_ax.invert_yaxis() # flip the histgram and KDE, so the KDE is on top
mean_ax.margins(y=0.1)
### Adding Text Annotations & Legends
## Create left-aligned titles for each row
axes_titles = [
('Population', ''),
('Samples', f'n = {sample_size}'),
('Sampling Distribution', 'of the sample mean')
]
for (title, subtitle), ax in zip(axes_titles, axes[:, 0], strict=True):
titlebox = [
TextArea(title, textprops={'size': 'large', 'weight': 'semibold'})
]
if subtitle:
titlebox.append(TextArea(subtitle, textprops={'style': 'italic'}))
title_packer = VPacker(pad=0, sep=5, children=titlebox)
legend = fig.legend(
*ax.get_legend_handles_labels(), markerscale=4, scatterpoints=4
)
legend.remove()
# Legends are composed of two children: VPacker & FancyBboxPatch
# We can extract the VPacker and add it to our own for a very custom title
legend_body, _ = legend.get_children()
transform = blended_transform_factory(fig.transFigure, ax.transAxes)
fig.add_artist(
AnchoredOffsetbox(
loc='upper left',
child=VPacker(
align='left', pad=10, sep=10,
children=[title_packer, legend_body]
),
bbox_to_anchor=(0.05, 1), bbox_transform=transform,
borderpad=0, frameon=False
)
)
## Add lines to separate rows of plots
for ax in axes[1:, 0]:
transform = blended_transform_factory(fig.transFigure, ax.transAxes)
fig.add_artist(
Line2D([.05, .95], [1, 1], color='lightgray', transform=transform)
)
for ax in axes[:, 1:].flat:
ax.yaxis.set_tick_params(left=False)
# Remove the spines
for ax in fig.axes:
sides = ['bottom', 'left', 'top', 'right']
if ax in axes[-1, :]:
sides.remove('bottom')
ax.spines[sides].set_visible(False)
## Figure title - VPacker with TextAreas enables great control
# over alignment & different fonts
figure_title = VPacker(
align='center', pad=0, sep=5,
children=[
TextArea(
'Visualizing the Central Limit Theorem',
textprops={'size': 'x-large', 'weight': 'bold'}
),
TextArea(
'Sampling Distributions of the Sample Mean',
textprops={'size': 'large', 'style': 'italic'}
)
])
fig.add_artist(
AnchoredOffsetbox(
loc='upper center', child=figure_title,
bbox_to_anchor=(0.5, 1.0), bbox_transform=fig.transFigure,
frameon=False
)
)
### Author Watermark
fig.text(.98, .02, s='Cameron Riddell @RiddleMeCam', ha='right', color='lightgray')
show()
# fig.savefig('clt.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment