Skip to content

Instantly share code, notes, and snippets.

@robdmc
Last active January 8, 2016 18:07
Show Gist options
  • Save robdmc/ea319344bfb5b5d0d171 to your computer and use it in GitHub Desktop.
Save robdmc/ea319344bfb5b5d0d171 to your computer and use it in GitHub Desktop.
Abstractions for working with lognormal distributions
"""
This collection of functions provides convenience abstractions
for working with lognormal distributions. Sums of lognormals result
in a distribution with no closed form solution. The functions provided
here assume the Fenton-Wilkinson approximation for such sums.
Running this google search should get you a good reference paper:
cobb "Approximating the Distribution of a Sum of Log-normal Random Variables"
Note that this package looks like an interesting add-on to the scipy
stats api
http://phobson.github.io/paramnormal/tutorial/overview.html
"""
%matplotlib inline
import scipy as scp
from scipy import stats
import pylab as pl
import numpy as np
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('talk')
def moments_from_data(x):
"""
Return the emperical moments from a dataset
Inputs:
x: iterable of data values
Returns:
mean, variance
"""
return np.mean(x), np.var(x)
def params_from_moments(mean, variance):
"""
Computes lognormal parameters from distribution moments.
Inputs:
mean: The desired first moment of the lognormal distribution
variance: The desired second moment of the variance
Returns:
mu: The mean of the log variates
sigma: the standard deviation of the log variates
"""
sigma2 = np.log(1. + variance / (mean ** 2.))
mu = np.log(mean) - 0.5 * sigma2
return mu, np.sqrt(sigma2)
def moments_from_params(mu, sigma):
"""
Computes moments of a lognormal distribution given the log variate mu and sigma
Inputs:
mu: the mean of the log variates
sigma: the standard deviation of the log variates
Returns:
mean: the first moment of the lognormal distribution
variance: the second moment of the lognormal distribution
"""
mean = np.exp(mu + 0.5 * sigma ** 2.)
variance = (np.exp(sigma ** 2.) - 1.) * np.exp(2. * mu + sigma ** 2.)
return mean, variance
def params_for_sum_of_params(N, mu0, sigma0):
"""
Returns the lognormal parameters that approximate the distribution
obtained by summing N lognormal distributions each with params mu0 and sigma0.
This is only an approximation, as there is no closed form solution for the
actual distribution.
Inputs:
N: The number of longormal variables that should be summed together
mu0: the mean log variate for the constituent variables
sigma0: the standard deviation of the log variates
Returns:
mu: the mean log variate of the approximate sum distribution
sigma: the standard devation for the log variates of the sum distribution
"""
sigma2 = np.log(((np.exp(sigma0 ** 2.) - 1.) / N) + 1.)
mu = np.log(N * np.exp(mu0)) + 0.5 * (sigma0 ** 2. - sigma2)
return mu, np.sqrt(sigma2)
def moments_for_sum_of_moments(N, mean, variance):
"""
Returns the moments for a sum of lognormal distributions each having
the equal moments.
Inputs:
N: The number of lognormal distributions that should be summed together
mean: The first moment of the constituent lognormals that enter the sum
variance: The second moment of the consituent lognormals that enter the sum
Returns:
mean_of_sum: The first moment of the approximate sum distribution
variance_of_sum: The second of the approximate sum distribution
"""
mu0, sigma0 = params_from_moments(mean, variance)
mu, sigma = params_for_sum_of_params(N, mu0, sigma0)
return moments_from_params(mu, sigma)
def lognormal_from_params(mu, sigma, offset=0):
"""
The scipy implementation of the lognormal distribution is confusing.
This is just a wrapper with easily understood arguments.
Inputs:
mu = The mean log variate
sigma = The standard deviation of the log variates
offset = a number allowing a shift in the coordinate system
Returns:
A scipy random number object with methods like pdf(), cdf(), and ppf()
"""
return stats.lognorm(s=sigma, loc=offset, scale=np.exp(mu))
def lognormal_from_moments(mean, variance):
"""
The scipy implementation of the lognormal distribution is confusing.
This is just a wrapper with easily understood arguments.
Inputs:
mean = The first moment of the desired lognormal distribution
variance = The second moment of the desired lognormal distribution
offset = a number allowing a shift in the coordinate system
Returns:
A scipy random number object with methods like pdf(), cdf(), and ppf()
"""
mu, sigma = params_from_moments(mean, variance)
return lognormal_from_params(mu, sigma)
def test_plots():
# create a support
x = np.logspace(-1, 3, 3000)
# get a scipy random_number object for the lognormal dist
rv = lognormal_from_moments(20, 6 ** 2)
# evaluate the pdf along the support and plot
y = rv.pdf(x)
yc = rv.cdf(x)
pl.plot(x, y, '-')
# calculate the moments
M1, error = scp.integrate.quad(lambda x: x * rv.pdf(x), 1e-2, 1e4)
M2, error = scp.integrate.quad(lambda x: x * x * rv.pdf(x), 1e-2, 1e4)
# define a delta in cdf to corresponding the probability under 1std of a normal dist
delta = .6827 / 2.
# set plot axis to a nice value
ax = pl.gca()
x_lim = list(rv.ppf([.001, .990]))
ax.set_xlim(*x_lim)
# print the uncertainty boundaries
print('uncertainty boundaries', rv.ppf([.5 + delta, .5, .5 - delta]))
# print the uncertainty radius
print('uncertainty radius', np.diff(rv.ppf([.5 - delta, .5 + delta])) / 2.)
# print the mean and standard devation
print('mean standard_devation', M1, M2 - M1 ** 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment