Skip to content

Instantly share code, notes, and snippets.

@jquacinella
Last active February 3, 2023 15:51
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jquacinella/1a6341f0f1446973714c to your computer and use it in GitHub Desktop.
Save jquacinella/1a6341f0f1446973714c to your computer and use it in GitHub Desktop.
Violin Plots for Weighted Data in Matplotlib
import weighted
from matplotlib.cbook import violin_stats
from scipy import stats
import statsmodels.api as sm
def vdensity_with_weights(weights):
''' Outer function allows innder function access to weights. Matplotlib
needs function to take in data and coords, so this seems like only way
to 'pass' custom density function a set of weights '''
def vdensity(data, coords):
''' Custom matplotlib weighted violin stats function '''
# Using weights from closure, get KDE fomr statsmodels
weighted_cost = sm.nonparametric.KDEUnivariate(data)
weighted_cost.fit(fft=False, weights=weights)
# Return y-values for graph of KDE by evaluating on coords
return weighted_cost.evaluate(coords)
return vdensity
def custom_violin_stats(data, weights):
# Get weighted median and mean (using weighted module for median)
median = weighted.quantile_1D(data, weights, 0.5)
mean, sumw = np.ma.average(data, weights=list(weights), returned=True)
# Use matplotlib violin_stats, which expects a function that takes in data and coords
# which we get from closure above
results = violin_stats(data, vdensity_with_weights(weights))
# Update result dictionary with our updated info
results[0][u"mean"] = mean
results[0][u"median"] = median
# No need to do this, since it should be populated from violin_stats
# results[0][u"min"] = np.min(data)
# results[0][u"max"] = np.max(data)
return results
### Example
#vpstats1 = custom_violin_stats(np.asarray(df_column_data), np.asarray(df_column_weights))
#vplot = ax.violin(vpstats1, [pos_idx], vert=False, showmeans=True, showextrema=True, showmedians=True)
#current_color_palette = ...
#for pc in vplot['bodies']:
# pc.set_facecolor(current_color_palette[pos_idx])
# pc.set_edgecolor('black')
@Gillu13
Copy link

Gillu13 commented Jan 13, 2017

Hello!

It looks like you use the np module here (I assume that this is an alias for numpy) but you never import it. Am i right?

@MaxGhenis
Copy link

Thanks for this! I added a couple pieces to make this fully runnable, you can see it here: https://colab.research.google.com/drive/1cSnJGKJEqbllkPbF2z0cnfdwT40sUKKR

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment