|
import numpy as np |
|
from numpy.linalg import solve |
|
import logging |
|
logging.basicConfig(level = logging.DEBUG) |
|
from scipy.stats import moment,norm |
|
|
|
def fleishman(b, c, d): |
|
"""calculate the variance, skew and kurtois of a Fleishman distribution |
|
F = -c + bZ + cZ^2 + dZ^3, where Z ~ N(0,1) |
|
""" |
|
b2 = b * b |
|
c2 = c * c |
|
d2 = d * d |
|
bd = b * d |
|
var = b2 + 6*bd + 2*c2 + 15*d2 |
|
skew = 2 * c * (b2 + 24*bd + 105*d2 + 2) |
|
kurt = 24 * (bd + c2 * (1 + b2 + 28*bd) + |
|
d2 * (12 + 48*bd + 141*c2 + 225*d2)) |
|
return (var, skew, kurt) |
|
|
|
def flfunc(b, c, d, skew, kurtosis): |
|
""" |
|
Given the fleishman coefficients, and a target skew and kurtois |
|
this function will have a root if the coefficients give the desired skew and kurtosis |
|
""" |
|
x,y,z = fleishman(b,c,d) |
|
return (x - 1, y - skew, z - kurtosis) |
|
|
|
def flderiv(b, c, d): |
|
""" |
|
The deriviative of the flfunc above |
|
returns a matrix of partial derivatives |
|
""" |
|
b2 = b * b |
|
c2 = c * c |
|
d2 = d * d |
|
bd = b * d |
|
df1db = 2*b + 6*d |
|
df1dc = 4*c |
|
df1dd = 6*b + 30*d |
|
df2db = 4*c * (b + 12*d) |
|
df2dc = 2 * (b2 + 24*bd + 105*d2 + 2) |
|
df2dd = 4 * c * (12*b + 105*d) |
|
df3db = 24 * (d + c2 * (2*b + 28*d) + 48 * d**3) |
|
df3dc = 48 * c * (1 + b2 + 28*bd + 141*d2) |
|
df3dd = 24 * (b + 28*b * c2 + 2 * d * (12 + 48*bd + |
|
141*c2 + 225*d2) + d2 * (48*b + 450*d)) |
|
return np.matrix([[df1db, df1dc, df1dd], |
|
[df2db, df2dc, df2dd], |
|
[df3db, df3dc, df3dd]]) |
|
|
|
def newton(a, b, c, skew, kurtosis, max_iter=25, converge=1e-5): |
|
"""Implements newtons method to find a root of flfunc.""" |
|
f = flfunc(a, b, c, skew, kurtosis) |
|
for i in range(max_iter): |
|
if max(map(abs, f)) < converge: |
|
break |
|
J = flderiv(a, b, c) |
|
delta = -solve(J, f) |
|
(a, b, c) = delta + (a,b,c) |
|
f = flfunc(a, b, c, skew, kurtosis) |
|
return (a, b, c) |
|
|
|
|
|
def fleishmanic(skew, kurt): |
|
"""Find an initial estimate of the fleisman coefficients, to feed to newtons method""" |
|
c1 = 0.95357 - 0.05679 * kurt + 0.03520 * skew**2 + 0.00133 * kurt**2 |
|
c2 = 0.10007 * skew + 0.00844 * skew**3 |
|
c3 = 0.30978 - 0.31655 * c1 |
|
logging.debug("inital guess {},{},{}".format(c1,c2,c3)) |
|
return (c1, c2, c3) |
|
|
|
|
|
def fit_fleishman_from_sk(skew, kurt): |
|
"""Find the fleishman distribution with given skew and kurtosis |
|
mean =0 and stdev =1 |
|
|
|
Returns None if no such distribution can be found |
|
""" |
|
if kurt < -1.13168 + 1.58837 * skew**2: |
|
return None |
|
a, b, c = fleishmanic(skew, kurt) |
|
coef = newton(a, b, c, skew, kurt) |
|
return(coef) |
|
|
|
def fit_fleishman_from_standardised_data(data): |
|
"""Fit a fleishman distribution to standardised data.""" |
|
skew = moment(data,3) |
|
kurt = moment(data,4) |
|
coeff = fit_fleishman_from_sk(skew,kurt) |
|
return coeff |
|
|
|
"""Data in form of one number per line""" |
|
with open('data.txt') as f: |
|
data = np.array([float(line) for line in f]) |
|
mean = sum(data)/len(data) |
|
std = moment(data,2)**0.5 |
|
std_data = (data - mean)/std |
|
|
|
coeff = fit_fleishman_from_standardised_data(std_data) |
|
print(coeff) |
|
|
|
def describe(data): |
|
"""Return summary statistics of as set of data""" |
|
mean = sum(data)/len(data) |
|
var = moment(data,2) |
|
skew = moment(data,3)/var**1.5 |
|
kurt = moment(data,4)/var**2 |
|
return (mean,var,skew,kurt) |
|
|
|
def generate_fleishman(a,b,c,d,N=100): |
|
"""Generate N data items from fleishman's distribution with given coefficents""" |
|
Z = norm.rvs(size=N) |
|
F = a + Z*(b +Z*(c+ Z*d)) |
|
return F |
|
|
|
print(describe(data)) |
|
sim = (generate_fleishman(-coeff[1],*coeff,N=10000))*std+mean |
|
print(describe(sim)) |
Thatnks @zeimusu. I've just forked and updated code with minimal changes to make it work. It looks like pull requests don't exist here, so feel free to copy and update your code. Thanks so much for the implementation. It is very easy to use.