Skip to content

Instantly share code, notes, and snippets.

@back2yes
Forked from paddymccrudden/Fleishman.py
Created February 3, 2022 17:45
Show Gist options
  • Save back2yes/fdb76f55f6d2c4989860d19f24a19525 to your computer and use it in GitHub Desktop.
Save back2yes/fdb76f55f6d2c4989860d19f24a19525 to your computer and use it in GitHub Desktop.
Generate data with given mean, standard deviation, skew, and kurtosis. Intended for monte carlo simulations with non normal distributions

Fleishman (Fleishman, A. (1978), “A Method for Simulating Non-normal Distributions,” Psychometrika, 43, 521–532.) gives a method of generating a distriubution with given skew and kurtois by making a linear combinination of Z, Z^2, and Z^3

These were implemented in SAS in

https://support.sas.com/content/dam/SAS/support/en/books/simulating-data-with-sas/65378_Appendix_D_Functions_for_Simulating_Data_by_Using_Fleishmans_Transformation.pdf

Here I present a python/numpy implementation.

We use "ekurt" and "ekurtosis" in this to specify that it is excess kurtosis rather that kurtosis. See https://en.wikipedia.org/wiki/Kurtosis for example. Also, the data set has been modified since the original in order to ensure that the conditions for a solution have been met. Notice that when you run the code on the sample set, the input set and the output set have the same description (ie: same mean, std, skew and kurt)

.92
2.79
1.35
7.3
1.41
1.39
.64
4.05
6.51
7.28
-4.5
2.38
6.84
1.67
.82
2.78
1.67
.81
.97
1.79
6.45
1.04
3.44
.79
1.43
1.69
1.53
1.76
3.1
1.43
1.24
4.79
2.02
1.33
2.34
2.45
2.38
2.48
2.36
.83
.91
1.74
3.95
3.77
.92
3.03
2.18
2.73
2.04
2.57
import numpy as np
from numpy.linalg import solve
import logging
logging.basicConfig(level = logging.DEBUG)
from scipy.stats import moment,norm
def fleishman(b, c, d):
"""calculate the variance, skew and kurtois of a Fleishman distribution
F = -c + bZ + cZ^2 + dZ^3, where Z ~ N(0,1)
"""
b2 = b * b
c2 = c * c
d2 = d * d
bd = b * d
var = b2 + 6*bd + 2*c2 + 15*d2
skew = 2 * c * (b2 + 24*bd + 105*d2 + 2)
ekurt = 24 * (bd + c2 * (1 + b2 + 28*bd) +
d2 * (12 + 48*bd + 141*c2 + 225*d2))
return (var, skew, ekurt)
def flfunc(b, c, d, skew, ekurtosis):
"""
Given the fleishman coefficients, and a target skew and kurtois
this function will have a root if the coefficients give the desired skew and ekurtosis
"""
x,y,z = fleishman(b,c,d)
return (x - 1, y - skew, z - ekurtosis)
def flderiv(b, c, d):
"""
The deriviative of the flfunc above
returns a matrix of partial derivatives
"""
b2 = b * b
c2 = c * c
d2 = d * d
bd = b * d
df1db = 2*b + 6*d
df1dc = 4*c
df1dd = 6*b + 30*d
df2db = 4*c * (b + 12*d)
df2dc = 2 * (b2 + 24*bd + 105*d2 + 2)
df2dd = 4 * c * (12*b + 105*d)
df3db = 24 * (d + c2 * (2*b + 28*d) + 48 * d**3)
df3dc = 48 * c * (1 + b2 + 28*bd + 141*d2)
df3dd = 24 * (b + 28*b * c2 + 2 * d * (12 + 48*bd +
141*c2 + 225*d2) + d2 * (48*b + 450*d))
return np.matrix([[df1db, df1dc, df1dd],
[df2db, df2dc, df2dd],
[df3db, df3dc, df3dd]])
def newton(a, b, c, skew, ekurtosis, max_iter=25, converge=1e-5):
"""Implements newtons method to find a root of flfunc."""
f = flfunc(a, b, c, skew, ekurtosis)
for i in range(max_iter):
if max(map(abs, f)) < converge:
break
J = flderiv(a, b, c)
delta = -solve(J, f)
(a, b, c) = delta + (a,b,c)
f = flfunc(a, b, c, skew, ekurtosis)
return (a, b, c)
def fleishmanic(skew, ekurt):
"""Find an initial estimate of the fleisman coefficients, to feed to newtons method"""
c1 = 0.95357 - 0.05679 * ekurt + 0.03520 * skew**2 + 0.00133 * ekurt**2
c2 = 0.10007 * skew + 0.00844 * skew**3
c3 = 0.30978 - 0.31655 * c1
logging.debug("inital guess {},{},{}".format(c1,c2,c3))
return (c1, c2, c3)
def fit_fleishman_from_sk(skew, ekurt):
"""Find the fleishman distribution with given skew and ekurtosis
mean =0 and stdev =1
Returns None if no such distribution can be found
"""
if ekurt < -1.13168 + 1.58837 * skew**2:
return None
a, b, c = fleishmanic(skew, ekurt)
coef = newton(a, b, c, skew, ekurt)
return(coef)
def fit_fleishman_from_standardised_data(data):
"""Fit a fleishman distribution to standardised data."""
skew = moment(data,3)
ekurt = moment(data,4) - 3.0 # excess kurtosis
coeff = fit_fleishman_from_sk(skew,ekurt)
return coeff
"""Data in form of one number per line"""
with open('data.txt') as f:
data = np.array([float(line) for line in f])
mean = sum(data)/len(data)
std = moment(data,2)**0.5
std_data = (data - mean)/std
coeff = fit_fleishman_from_standardised_data(std_data)
print(coeff)
def describe(data):
"""Return summary statistics of as set of data"""
mean = sum(data)/len(data)
var = moment(data,2)
skew = moment(data,3)/var**1.5
kurt = moment(data,4)/var**2
return (mean,var,skew,kurt)
def generate_fleishman(a,b,c,d,N=100):
"""Generate N data items from fleishman's distribution with given coefficents"""
Z = norm.rvs(size=N)
F = a + Z*(b +Z*(c+ Z*d))
return F
print(describe(data))
sim = (generate_fleishman(-coeff[1],*coeff,N=10000))*std+mean
print(describe(sim))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment