Skip to content

Instantly share code, notes, and snippets.

@aflansburg
Last active July 14, 2021 18:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflansburg/87b7a9583e0a89cebc4a26b223a81f99 to your computer and use it in GitHub Desktop.
Save aflansburg/87b7a9583e0a89cebc4a26b223a81f99 to your computer and use it in GitHub Desktop.
Dual histogram + boxplot with KDE for univariate analysis + Mean & Median Lines
# import libs
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# this function will create a boxplot + histogram plot using Seaborn's Jointgrid
# we'll also provide Type annotations to provide hints to future users
def dual_plot(series: pd.Series, figsize: tuple = (16,8),
bins: int = None, return_plot: bool = False,
color: str = "pastel", xlim: tuple = None, tick_m: int = None,
add_lbl: str = None) -> None:
'''
Parameters:
series: the feature to plot,
figsize: the size of the plot,
bins: interval size for data points to be contained in. aka. classes, buckets,
return_plot: Return the plot if true, otherwise just show the plot using `Matplotlib.pyplot.show()`,
color: seaborn color palette to use for the plot,
xlim: right-left limits of the x axis,
tick_m: show more ticks in specific multiples,
add_lbl: additional text to append to the x-label
Returns:
Returns plot if `return_plot` is true, otherwise just show the plot using `Matplotlib.pyplot.show()`
'''
# set the color palette
sns.set_palette(color)
# create the JointGrid object
dual_plot = sns.JointGrid(x=series,height=figsize[1])
# add the seaborn histplot
dual_plot.plot_joint(sns.histplot, kde=True)
# get ax_joint for joint plot so we can 'do things' with it
ax_joint = dual_plot.ax_joint
# set the seaborn plot title with some padding
ax_joint.set_title(label=series.name, fontdict={'fontsize': 20}, pad=(16))
# add to x-label if necessary
if add_lbl:
ax_joint.set_xlabel(ax_joint.get_xlabel() + ' ' + add_lbl)
# set limit of x-axis
ax_joint.set_xlim(xlim)
# show more ticks - useful for integers
if tick_m:
x_ticks = np.unique([(tick_m*round(i/5)) for i in sorted(series.unique())])
ax_joint.set_xticks(x_ticks)
# obtain the individual axes to add our mean and median lines for
# central tendency visualization
for axis in (dual_plot.ax_joint, dual_plot.ax_marg_x):
axis.axvline(np.mean(series), color='r', linestyle='-')
axis.axvline(np.median(series), color='b', linestyle='--')
# add the boxplot
dual_plot.plot_marginals(sns.boxplot)
return dual_plot if return_plot else plt.show()
dual_plot(series=data['Income'],color='colorblind',add_lbl="(in thousands - USD)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment