Skip to content

Instantly share code, notes, and snippets.

@jquacinella
Last active February 3, 2023 15:51
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jquacinella/1a6341f0f1446973714c to your computer and use it in GitHub Desktop.
Save jquacinella/1a6341f0f1446973714c to your computer and use it in GitHub Desktop.
Violin Plots for Weighted Data in Matplotlib
import weighted
from matplotlib.cbook import violin_stats
from scipy import stats
import statsmodels.api as sm
def vdensity_with_weights(weights):
''' Outer function allows innder function access to weights. Matplotlib
needs function to take in data and coords, so this seems like only way
to 'pass' custom density function a set of weights '''
def vdensity(data, coords):
''' Custom matplotlib weighted violin stats function '''
# Using weights from closure, get KDE fomr statsmodels
weighted_cost = sm.nonparametric.KDEUnivariate(data)
weighted_cost.fit(fft=False, weights=weights)
# Return y-values for graph of KDE by evaluating on coords
return weighted_cost.evaluate(coords)
return vdensity
def custom_violin_stats(data, weights):
# Get weighted median and mean (using weighted module for median)
median = weighted.quantile_1D(data, weights, 0.5)
mean, sumw = np.ma.average(data, weights=list(weights), returned=True)
# Use matplotlib violin_stats, which expects a function that takes in data and coords
# which we get from closure above
results = violin_stats(data, vdensity_with_weights(weights))
# Update result dictionary with our updated info
results[0][u"mean"] = mean
results[0][u"median"] = median
# No need to do this, since it should be populated from violin_stats
# results[0][u"min"] = np.min(data)
# results[0][u"max"] = np.max(data)
return results
### Example
#vpstats1 = custom_violin_stats(np.asarray(df_column_data), np.asarray(df_column_weights))
#vplot = ax.violin(vpstats1, [pos_idx], vert=False, showmeans=True, showextrema=True, showmedians=True)
#current_color_palette = ...
#for pc in vplot['bodies']:
# pc.set_facecolor(current_color_palette[pos_idx])
# pc.set_edgecolor('black')
@MaxGhenis
Copy link

Thanks for this! I added a couple pieces to make this fully runnable, you can see it here: https://colab.research.google.com/drive/1cSnJGKJEqbllkPbF2z0cnfdwT40sUKKR

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment