zeimusu/Fleishman.py

## readme.md

      
    Raw
  

              readme.md
            
          
    Fleishman (Fleishman, A. (1978), “A Method for Simulating Non-normal Distributions,” Psychometrika, 43, 521–532.)
gives a method of generating a distriubution with given skew and kurtois by making a linear combinination of Z, Z^2, and Z^3
These were implemented in SAS in
https://support.sas.com/content/dam/SAS/support/en/books/simulating-data-with-sas/65378_Appendix_D_Functions_for_Simulating_Data_by_Using_Fleishmans_Transformation.pdf
Here I present a python/numpy implementation

  
## data.txt
.92
2.79
1.35
7.3
1.41
1.39
.64
4.05
6.51
7.28
10.79
2.38
6.84
1.67
.82
2.78
1.67
.81
.97
1.79
6.45
1.04
3.44
.79
1.43
1.69
1.53
1.76
3.1
1.43
1.24
4.79
2.02
1.33
2.34
2.45
2.38
2.48
2.36
.83
.91
1.74
3.95
3.77
.92
3.03
2.18
2.73
2.04
2.57

## Fleishman.py
import numpy as np
from numpy.linalg import solve
import logging
logging.basicConfig(level = logging.DEBUG)
from scipy.stats import moment,norm

def fleishman(b, c, d):
    """calculate the variance, skew and kurtois of a Fleishman distribution
    F = -c + bZ + cZ^2 + dZ^3, where Z ~ N(0,1)
    """
    b2 = b * b
    c2 = c * c
    d2 = d * d
    bd = b * d
    var = b2 + 6*bd + 2*c2 + 15*d2
    skew = 2 * c * (b2 + 24*bd + 105*d2 + 2)
    kurt = 24 * (bd + c2 * (1 + b2 + 28*bd) +
                 d2 * (12 + 48*bd + 141*c2 + 225*d2))
    return (var, skew, kurt)

def flfunc(b, c, d, skew, kurtosis):
    """
    Given the fleishman coefficients, and a target skew and kurtois
    this function will have a root if the coefficients give the desired skew and kurtosis
    """
    x,y,z = fleishman(b,c,d)
    return (x - 1, y - skew, z - kurtosis)

def flderiv(b, c, d):
    """
    The deriviative of the flfunc above
    returns a matrix of partial derivatives
    """
    b2 = b * b
    c2 = c * c
    d2 = d * d
    bd = b * d
    df1db = 2*b + 6*d
    df1dc = 4*c
    df1dd = 6*b + 30*d
    df2db = 4*c * (b + 12*d)
    df2dc = 2 * (b2 + 24*bd + 105*d2 + 2)
    df2dd = 4 * c * (12*b + 105*d)
    df3db = 24 * (d + c2 * (2*b + 28*d) + 48 * d**3)
    df3dc = 48 * c * (1 + b2 + 28*bd + 141*d2)
    df3dd = 24 * (b + 28*b * c2 + 2 * d * (12 + 48*bd +
                  141*c2 + 225*d2) + d2 * (48*b + 450*d))
    return np.matrix([[df1db, df1dc, df1dd],
                      [df2db, df2dc, df2dd],
                      [df3db, df3dc, df3dd]])

def newton(a, b, c, skew, kurtosis, max_iter=25, converge=1e-5):
    """Implements newtons method to find a root of flfunc."""
    f = flfunc(a, b, c, skew, kurtosis)
    for i in range(max_iter):
        if max(map(abs, f)) < converge:
            break
        J = flderiv(a, b, c)
        delta = -solve(J, f)
        (a, b, c) = delta + (a,b,c)
        f = flfunc(a, b, c, skew, kurtosis)
    return (a, b, c)


def fleishmanic(skew, kurt):
    """Find an initial estimate of the fleisman coefficients, to feed to newtons method"""
    c1 = 0.95357 - 0.05679 * kurt + 0.03520 * skew**2 + 0.00133 * kurt**2
    c2 = 0.10007 * skew + 0.00844 * skew**3
    c3 = 0.30978 - 0.31655 * c1
    logging.debug("inital guess {},{},{}".format(c1,c2,c3))
    return (c1, c2, c3)


def fit_fleishman_from_sk(skew, kurt):
    """Find the fleishman distribution with given skew and kurtosis
    mean =0 and stdev =1

    Returns None if no such distribution can be found
    """
    if kurt < -1.13168 + 1.58837 * skew**2:
        return None
    a, b, c = fleishmanic(skew, kurt)
    coef = newton(a, b, c, skew, kurt)
    return(coef)

def fit_fleishman_from_standardised_data(data):
    """Fit a fleishman distribution to standardised data."""
    skew = moment(data,3)
    kurt = moment(data,4)
    coeff = fit_fleishman_from_sk(skew,kurt)
    return coeff

"""Data in form of one number per line"""
with open('data.txt') as f:
    data = np.array([float(line) for line in f])
mean = sum(data)/len(data)
std = moment(data,2)**0.5
std_data = (data - mean)/std

coeff = fit_fleishman_from_standardised_data(std_data)
print(coeff)

def describe(data):
    """Return summary statistics of as set of data"""
    mean = sum(data)/len(data)
    var = moment(data,2)
    skew = moment(data,3)/var**1.5
    kurt = moment(data,4)/var**2
    return (mean,var,skew,kurt)

def generate_fleishman(a,b,c,d,N=100):
    """Generate N data items from fleishman's distribution with given coefficents"""
    Z = norm.rvs(size=N)
    F = a + Z*(b +Z*(c+ Z*d))
    return F

print(describe(data))
sim = (generate_fleishman(-coeff[1],*coeff,N=10000))*std+mean
print(describe(sim))
	.92
	2.79
	1.35
	7.3
	1.41
	1.39
	.64
	4.05
	6.51
	7.28
	10.79
	2.38
	6.84
	1.67
	.82
	2.78
	1.67
	.81
	.97
	1.79
	6.45
	1.04
	3.44
	.79
	1.43
	1.69
	1.53
	1.76
	3.1
	1.43
	1.24
	4.79
	2.02
	1.33
	2.34
	2.45
	2.38
	2.48
	2.36
	.83
	.91
	1.74
	3.95
	3.77
	.92
	3.03
	2.18
	2.73
	2.04
	2.57
	import numpy as np
	from numpy.linalg import solve
	import logging
	logging.basicConfig(level = logging.DEBUG)
	from scipy.stats import moment,norm

	def fleishman(b, c, d):
	"""calculate the variance, skew and kurtois of a Fleishman distribution
	F = -c + bZ + cZ^2 + dZ^3, where Z ~ N(0,1)
	"""
	b2 = b * b
	c2 = c * c
	d2 = d * d
	bd = b * d
	var = b2 + 6bd + 2c2 + 15*d2
	skew = 2 * c * (b2 + 24bd + 105d2 + 2)
	kurt = 24 * (bd + c2 * (1 + b2 + 28*bd) +
	d2 * (12 + 48bd + 141c2 + 225*d2))
	return (var, skew, kurt)

	def flfunc(b, c, d, skew, kurtosis):
	"""
	Given the fleishman coefficients, and a target skew and kurtois
	this function will have a root if the coefficients give the desired skew and kurtosis
	"""
	x,y,z = fleishman(b,c,d)
	return (x - 1, y - skew, z - kurtosis)

	def flderiv(b, c, d):
	"""
	The deriviative of the flfunc above
	returns a matrix of partial derivatives
	"""
	b2 = b * b
	c2 = c * c
	d2 = d * d
	bd = b * d
	df1db = 2b + 6d
	df1dc = 4*c
	df1dd = 6b + 30d
	df2db = 4c (b + 12*d)
	df2dc = 2 * (b2 + 24bd + 105d2 + 2)
	df2dd = 4 * c * (12b + 105d)
	df3db = 24 * (d + c2 * (2b + 28d) + 48 * d**3)
	df3dc = 48 * c * (1 + b2 + 28bd + 141d2)
	df3dd = 24 * (b + 28b c2 + 2 * d * (12 + 48*bd +
	141c2 + 225d2) + d2 * (48b + 450d))
	return np.matrix([[df1db, df1dc, df1dd],
	[df2db, df2dc, df2dd],
	[df3db, df3dc, df3dd]])

	def newton(a, b, c, skew, kurtosis, max_iter=25, converge=1e-5):
	"""Implements newtons method to find a root of flfunc."""
	f = flfunc(a, b, c, skew, kurtosis)
	for i in range(max_iter):
	if max(map(abs, f)) < converge:
	break
	J = flderiv(a, b, c)
	delta = -solve(J, f)
	(a, b, c) = delta + (a,b,c)
	f = flfunc(a, b, c, skew, kurtosis)
	return (a, b, c)


	def fleishmanic(skew, kurt):
	"""Find an initial estimate of the fleisman coefficients, to feed to newtons method"""
	c1 = 0.95357 - 0.05679 * kurt + 0.03520 * skew*2 + 0.00133 kurt**2
	c2 = 0.10007 * skew + 0.00844 * skew**3
	c3 = 0.30978 - 0.31655 * c1
	logging.debug("inital guess {},{},{}".format(c1,c2,c3))
	return (c1, c2, c3)


	def fit_fleishman_from_sk(skew, kurt):
	"""Find the fleishman distribution with given skew and kurtosis
	mean =0 and stdev =1

	Returns None if no such distribution can be found
	"""
	if kurt < -1.13168 + 1.58837 * skew**2:
	return None
	a, b, c = fleishmanic(skew, kurt)
	coef = newton(a, b, c, skew, kurt)
	return(coef)

	def fit_fleishman_from_standardised_data(data):
	"""Fit a fleishman distribution to standardised data."""
	skew = moment(data,3)
	kurt = moment(data,4)
	coeff = fit_fleishman_from_sk(skew,kurt)
	return coeff

	"""Data in form of one number per line"""
	with open('data.txt') as f:
	data = np.array([float(line) for line in f])
	mean = sum(data)/len(data)
	std = moment(data,2)**0.5
	std_data = (data - mean)/std

	coeff = fit_fleishman_from_standardised_data(std_data)
	print(coeff)

	def describe(data):
	"""Return summary statistics of as set of data"""
	mean = sum(data)/len(data)
	var = moment(data,2)
	skew = moment(data,3)/var**1.5
	kurt = moment(data,4)/var**2
	return (mean,var,skew,kurt)

	def generate_fleishman(a,b,c,d,N=100):
	"""Generate N data items from fleishman's distribution with given coefficents"""
	Z = norm.rvs(size=N)
	F = a + Z(b +Z(c+ Z*d))
	return F

	print(describe(data))
	sim = (generate_fleishman(-coeff[1],coeff,N=10000))std+mean
	print(describe(sim))