Skip to content

Instantly share code, notes, and snippets.

@pierdom
Last active January 19, 2022 10:30
Show Gist options
  • Save pierdom/235278048ab7127b64c1f87ca7297c87 to your computer and use it in GitHub Desktop.
Save pierdom/235278048ab7127b64c1f87ca7297c87 to your computer and use it in GitHub Desktop.
[Find best fitting distributions] Find the best fitting PDFs (power distribution functions) from a list of well-known distributions in scipy. Inspired by this: https://stackoverflow.com/questions/6620471/fitting-empirical-distribution-to-theoretical-ones-with-scipy-python #datascience #python #matplotlib #visualization #statistics
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# import matplotlib and set inline
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
%matplotlib inline
# import other libraries
import scipy.stats as st
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
# set seaborn style
sns.set_style("whitegrid")
# my distribution
mydistr = ...
# X axis with custom binning (change here, depending on what you need)
mydistr_x = np.linspace(mydistr.min(),mydistr.max(),int(mydistr.max()))
# define a set of distributions to check
distribution_list = [
st.alpha,st.anglit,st.arcsine,st.beta,st.betaprime,st.bradford,st.burr,st.cauchy,st.chi,st.chi2,st.cosine,
st.dgamma,st.dweibull,st.erlang,st.expon,st.exponnorm,st.exponweib,st.exponpow,st.f,st.fatiguelife,st.fisk,
st.foldcauchy,st.foldnorm,st.frechet_r,st.frechet_l,st.genlogistic,st.genpareto,st.gennorm,st.genexpon,
st.genextreme,st.gausshyper,st.gamma,st.gengamma,st.genhalflogistic,st.gilbrat,st.gompertz,st.gumbel_r,
st.gumbel_l,st.halfcauchy,st.halflogistic,st.halfnorm,st.halfgennorm,st.hypsecant,st.invgamma,st.invgauss,
st.invweibull,st.johnsonsb,st.johnsonsu,st.ksone,st.kstwobign,st.laplace,st.levy,st.levy_l,st.levy_stable,
st.logistic,st.loggamma,st.loglaplace,st.lognorm,st.lomax,st.maxwell,st.mielke,st.nakagami,st.ncx2,st.ncf,
st.nct,st.norm,st.pareto,st.pearson3,st.powerlaw,st.powerlognorm,st.powernorm,st.rdist,st.reciprocal,
st.rayleigh,st.rice,st.recipinvgauss,st.semicircular,st.t,st.triang,st.truncexpon,st.truncnorm,st.tukeylambda,
st.uniform,st.vonmises,st.vonmises_line,st.wald,st.weibull_min,st.weibull_max,st.wrapcauchy
]
# this list will contain the result of the fittings
distribution_fitting = []
# fit every distribution in the list above
for ref_distr in distribution_list:
# pick distribution name
distr_name = type(ref_distr).__name__.split("_")[0]
try:
# ignore warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# get parameters from the best fit
params = ref_distr.fit(mydistr)
arg = params[:-2]
loc = params[-2]
scale = params[-1]
# build the PDF from previous parameters
pdf_fitted = ref_distr.pdf(mydistr_x,loc=loc,scale=scale,*arg)
# calculate maximum likelihood estimator
mle = ref_distr.nnlf(params, mydistr)
# as an alternative: sum of square error
sse = np.sum(np.power(mydistr_x - pdf_fitted, 2.0))
# add results to list
distribution_fitting.append({
"distr_name": distr_name, "mle": mle, "sse":sse,
"pdf_fitted": pdf_fitted, "params": params})
# ignore distributions that could not be fitted
except Exception:
print("Discarded function: {}".format(distr_name))
# plot data histograms
fig, ax = plt.subplots(1,1,dpi=120)
_ = ax.hist(mydistr, mydistr_x, normed=True, color="gray", alpha=0.4)
# set stuff for this axis
ax.set_prop_cycle('color',plt.cm.rainbow(np.linspace(0,1,top)))
ax.set_title("Best fitting PDFs")
# just print TOP distributions according to maxim. likelihood estimator
top = 5
sort_by = "mle" # or sse
for d in sorted(distribution_fitting, key=lambda k: k[sort_by])[:top]:
distr_name = d['distr_name']
mle = d['mle']
pdf_fitted = d['pdf_fitted']
params = d['params']
label = "{} [{:.2f}]".format(distr_name,mle)
_ = ax.plot(mydistr_x, pdf_fitted, label=label)
_ = ax.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment