Last active
June 21, 2018 11:50
-
-
Save larsbratholm/a60eefb2411b373a965e11405f132eb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code is heavily based on a blog post by Andre Dietrich, currently | |
# found at http://www.aizac.info/simple-check-of-a-sample-against-80-distributions/ | |
from __future__ import print_function | |
import scipy.stats | |
import warnings | |
import numpy as np | |
import sys | |
import operator | |
# just for surpressing warnings | |
warnings.simplefilter('ignore') | |
t = sys.argv[1:] | |
stats = {} | |
for filename in t: | |
print("processing %s" % filename) | |
data = np.loadtxt(filename) | |
cdfs = [func for func in dir(scipy.stats) if callable(getattr(scipy.stats, func))] | |
for cdf_name in cdfs: | |
try: | |
cdf = eval("scipy.stats."+cdf_name) | |
#fit our data set against every probability distribution | |
parameters = cdf.fit(data) | |
negative_log_likelihood = cdf.nnlf(parameters, data) | |
aicc = 2 * data.size * len(parameters) / (data.size - len(parameters) - 1) + 2 * negative_log_likelihood | |
if not np.isnan(aicc) and not np.isinf(aicc): | |
stats[cdf_name] = aicc | |
except (AttributeError, NotImplementedError, TypeError): | |
pass | |
# Sort | |
sorted_stats = sorted(stats.items(), key=operator.itemgetter(1)) | |
for cdf, aicc in sorted_stats: | |
print("{:15s} {:10.2f}".format(cdf, aicc)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment