robdmc/log_normal_abstractions.py

## log_normal_abstractions.py
"""
This collection of functions provides convenience abstractions
for working with lognormal distributions.  Sums of lognormals result
in a distribution with no closed form solution.  The functions provided
here assume the Fenton-Wilkinson approximation for such sums.

Running this google search should get you a good reference paper:
  cobb "Approximating the Distribution of a Sum of Log-normal Random Variables"

Note that this package looks like an interesting add-on to the scipy
stats api
http://phobson.github.io/paramnormal/tutorial/overview.html
"""

%matplotlib inline
import scipy as scp
from scipy import stats
import pylab as pl
import numpy as np
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('talk')

def moments_from_data(x):
    """
    Return the emperical moments from a dataset
    Inputs:
        x: iterable of data values
    Returns:
        mean, variance
    """
    return np.mean(x), np.var(x)

def params_from_moments(mean, variance):
    """
    Computes lognormal parameters from distribution moments.
    Inputs:
        mean: The desired first moment of the lognormal distribution
        variance: The desired second moment of the variance
    Returns:
        mu: The mean of the log variates
        sigma: the standard deviation of the log variates
    """
    sigma2 = np.log(1. + variance / (mean ** 2.))
    mu = np.log(mean) - 0.5 * sigma2
    return mu, np.sqrt(sigma2)

def moments_from_params(mu, sigma):
    """
    Computes moments of a lognormal distribution given the log variate mu and sigma
    Inputs:
        mu: the mean of the log variates
        sigma: the standard deviation of the log variates
    Returns:
        mean: the first moment of the lognormal distribution
        variance: the second moment of the lognormal distribution
    """
    mean = np.exp(mu + 0.5 * sigma ** 2.)
    variance = (np.exp(sigma ** 2.) - 1.) * np.exp(2. * mu + sigma ** 2.)
    return mean, variance

def params_for_sum_of_params(N, mu0, sigma0):
    """
    Returns the lognormal parameters that approximate the distribution
    obtained by summing N lognormal distributions each with params mu0 and sigma0.
    This is only an approximation, as there is no closed form solution for the
    actual distribution.
    Inputs:
        N: The number of longormal variables that should be summed together
        mu0: the mean log variate for the constituent variables
        sigma0: the standard deviation of the log variates
    Returns:
        mu: the mean log variate of the approximate sum distribution
        sigma: the standard devation for the log variates of the sum distribution
    """
    sigma2 = np.log(((np.exp(sigma0 ** 2.) - 1.) / N) + 1.)
    mu = np.log(N * np.exp(mu0)) + 0.5 * (sigma0 ** 2. - sigma2)
    return mu, np.sqrt(sigma2)


def moments_for_sum_of_moments(N, mean, variance):
    """
    Returns the moments for a sum of lognormal distributions each having
    the equal moments.
    Inputs:
        N: The number of lognormal distributions that should be summed together
        mean: The first moment of the constituent lognormals that enter the sum
        variance: The second moment of the consituent lognormals that enter the sum
    Returns:
        mean_of_sum: The first moment of the approximate sum distribution
        variance_of_sum: The second of the approximate sum distribution


    """
    mu0, sigma0 = params_from_moments(mean, variance)
    mu, sigma = params_for_sum_of_params(N, mu0, sigma0)
    return moments_from_params(mu, sigma)


def lognormal_from_params(mu, sigma, offset=0):
    """
    The scipy implementation of the lognormal distribution is confusing.
    This is just a wrapper with easily understood arguments.
    Inputs:
        mu = The mean log variate
        sigma = The standard deviation of the log variates
        offset = a number allowing a shift in the coordinate system
    Returns:
        A scipy random number object with methods like pdf(), cdf(), and ppf()
    """
    return stats.lognorm(s=sigma, loc=offset, scale=np.exp(mu))


def lognormal_from_moments(mean, variance):
    """
    The scipy implementation of the lognormal distribution is confusing.
    This is just a wrapper with easily understood arguments.
    Inputs:
        mean = The first moment of the desired lognormal distribution
        variance = The second moment of the desired lognormal distribution
        offset = a number allowing a shift in the coordinate system
    Returns:
        A scipy random number object with methods like pdf(), cdf(), and ppf()
    """
    mu, sigma = params_from_moments(mean, variance)
    return lognormal_from_params(mu, sigma)

def test_plots():
    # create a support
    x = np.logspace(-1, 3, 3000)

    # get a scipy random_number object for the lognormal dist
    rv = lognormal_from_moments(20, 6 ** 2)

    # evaluate the pdf along the support and plot
    y = rv.pdf(x)
    yc = rv.cdf(x)
    pl.plot(x, y, '-')

    # calculate the moments
    M1, error = scp.integrate.quad(lambda x: x * rv.pdf(x), 1e-2, 1e4)
    M2, error = scp.integrate.quad(lambda x: x * x * rv.pdf(x), 1e-2, 1e4)

    # define a delta in cdf to corresponding the probability under 1std of a normal dist
    delta = .6827 / 2.

    # set plot axis to a nice value
    ax = pl.gca()
    x_lim = list(rv.ppf([.001, .990]))
    ax.set_xlim(*x_lim)

    # print the uncertainty boundaries
    print('uncertainty boundaries', rv.ppf([.5 + delta, .5,  .5 - delta]))

    # print the uncertainty radius
    print('uncertainty radius', np.diff(rv.ppf([.5 - delta, .5 + delta])) / 2.)

    # print the mean and standard devation
    print('mean standard_devation', M1, M2 - M1 ** 2)
	"""
	This collection of functions provides convenience abstractions
	for working with lognormal distributions. Sums of lognormals result
	in a distribution with no closed form solution. The functions provided
	here assume the Fenton-Wilkinson approximation for such sums.

	Running this google search should get you a good reference paper:
	cobb "Approximating the Distribution of a Sum of Log-normal Random Variables"

	Note that this package looks like an interesting add-on to the scipy
	stats api
	http://phobson.github.io/paramnormal/tutorial/overview.html
	"""

	%matplotlib inline
	import scipy as scp
	from scipy import stats
	import pylab as pl
	import numpy as np
	import seaborn as sns
	sns.set_style('darkgrid')
	sns.set_context('talk')

	def moments_from_data(x):
	"""
	Return the emperical moments from a dataset
	Inputs:
	x: iterable of data values
	Returns:
	mean, variance
	"""
	return np.mean(x), np.var(x)

	def params_from_moments(mean, variance):
	"""
	Computes lognormal parameters from distribution moments.
	Inputs:
	mean: The desired first moment of the lognormal distribution
	variance: The desired second moment of the variance
	Returns:
	mu: The mean of the log variates
	sigma: the standard deviation of the log variates
	"""
	sigma2 = np.log(1. + variance / (mean ** 2.))
	mu = np.log(mean) - 0.5 * sigma2
	return mu, np.sqrt(sigma2)

	def moments_from_params(mu, sigma):
	"""
	Computes moments of a lognormal distribution given the log variate mu and sigma
	Inputs:
	mu: the mean of the log variates
	sigma: the standard deviation of the log variates
	Returns:
	mean: the first moment of the lognormal distribution
	variance: the second moment of the lognormal distribution
	"""
	mean = np.exp(mu + 0.5 * sigma ** 2.)
	variance = (np.exp(sigma ** 2.) - 1.) * np.exp(2. * mu + sigma ** 2.)
	return mean, variance

	def params_for_sum_of_params(N, mu0, sigma0):
	"""
	Returns the lognormal parameters that approximate the distribution
	obtained by summing N lognormal distributions each with params mu0 and sigma0.
	This is only an approximation, as there is no closed form solution for the
	actual distribution.
	Inputs:
	N: The number of longormal variables that should be summed together
	mu0: the mean log variate for the constituent variables
	sigma0: the standard deviation of the log variates
	Returns:
	mu: the mean log variate of the approximate sum distribution
	sigma: the standard devation for the log variates of the sum distribution
	"""
	sigma2 = np.log(((np.exp(sigma0 ** 2.) - 1.) / N) + 1.)
	mu = np.log(N * np.exp(mu0)) + 0.5 * (sigma0 ** 2. - sigma2)
	return mu, np.sqrt(sigma2)


	def moments_for_sum_of_moments(N, mean, variance):
	"""
	Returns the moments for a sum of lognormal distributions each having
	the equal moments.
	Inputs:
	N: The number of lognormal distributions that should be summed together
	mean: The first moment of the constituent lognormals that enter the sum
	variance: The second moment of the consituent lognormals that enter the sum
	Returns:
	mean_of_sum: The first moment of the approximate sum distribution
	variance_of_sum: The second of the approximate sum distribution


	"""
	mu0, sigma0 = params_from_moments(mean, variance)
	mu, sigma = params_for_sum_of_params(N, mu0, sigma0)
	return moments_from_params(mu, sigma)


	def lognormal_from_params(mu, sigma, offset=0):
	"""
	The scipy implementation of the lognormal distribution is confusing.
	This is just a wrapper with easily understood arguments.
	Inputs:
	mu = The mean log variate
	sigma = The standard deviation of the log variates
	offset = a number allowing a shift in the coordinate system
	Returns:
	A scipy random number object with methods like pdf(), cdf(), and ppf()
	"""
	return stats.lognorm(s=sigma, loc=offset, scale=np.exp(mu))


	def lognormal_from_moments(mean, variance):
	"""
	The scipy implementation of the lognormal distribution is confusing.
	This is just a wrapper with easily understood arguments.
	Inputs:
	mean = The first moment of the desired lognormal distribution
	variance = The second moment of the desired lognormal distribution
	offset = a number allowing a shift in the coordinate system
	Returns:
	A scipy random number object with methods like pdf(), cdf(), and ppf()
	"""
	mu, sigma = params_from_moments(mean, variance)
	return lognormal_from_params(mu, sigma)

	def test_plots():
	# create a support
	x = np.logspace(-1, 3, 3000)

	# get a scipy random_number object for the lognormal dist
	rv = lognormal_from_moments(20, 6 ** 2)

	# evaluate the pdf along the support and plot
	y = rv.pdf(x)
	yc = rv.cdf(x)
	pl.plot(x, y, '-')

	# calculate the moments
	M1, error = scp.integrate.quad(lambda x: x * rv.pdf(x), 1e-2, 1e4)
	M2, error = scp.integrate.quad(lambda x: x * x * rv.pdf(x), 1e-2, 1e4)

	# define a delta in cdf to corresponding the probability under 1std of a normal dist
	delta = .6827 / 2.

	# set plot axis to a nice value
	ax = pl.gca()
	x_lim = list(rv.ppf([.001, .990]))
	ax.set_xlim(*x_lim)

	# print the uncertainty boundaries
	print('uncertainty boundaries', rv.ppf([.5 + delta, .5, .5 - delta]))

	# print the uncertainty radius
	print('uncertainty radius', np.diff(rv.ppf([.5 - delta, .5 + delta])) / 2.)

	# print the mean and standard devation
	print('mean standard_devation', M1, M2 - M1 ** 2)