Skip to content

Instantly share code, notes, and snippets.

@larsbratholm
Last active June 21, 2018 11:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save larsbratholm/a60eefb2411b373a965e11405f132eb8 to your computer and use it in GitHub Desktop.
Save larsbratholm/a60eefb2411b373a965e11405f132eb8 to your computer and use it in GitHub Desktop.
# This code is heavily based on a blog post by Andre Dietrich, currently
# found at http://www.aizac.info/simple-check-of-a-sample-against-80-distributions/
from __future__ import print_function
import scipy.stats
import warnings
import numpy as np
import sys
import operator
# just for surpressing warnings
warnings.simplefilter('ignore')
t = sys.argv[1:]
stats = {}
for filename in t:
print("processing %s" % filename)
data = np.loadtxt(filename)
cdfs = [func for func in dir(scipy.stats) if callable(getattr(scipy.stats, func))]
for cdf_name in cdfs:
try:
cdf = eval("scipy.stats."+cdf_name)
#fit our data set against every probability distribution
parameters = cdf.fit(data)
negative_log_likelihood = cdf.nnlf(parameters, data)
aicc = 2 * data.size * len(parameters) / (data.size - len(parameters) - 1) + 2 * negative_log_likelihood
if not np.isnan(aicc) and not np.isinf(aicc):
stats[cdf_name] = aicc
except (AttributeError, NotImplementedError, TypeError):
pass
# Sort
sorted_stats = sorted(stats.items(), key=operator.itemgetter(1))
for cdf, aicc in sorted_stats:
print("{:15s} {:10.2f}".format(cdf, aicc))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment