Created
November 9, 2011 21:30
-
-
Save stober/1353114 to your computer and use it in GitHub Desktop.
Normal Distribution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
""" | |
Author: Jeremy M. Stober | |
Program: NORMAL.PY | |
Date: Friday, July 7, 2011 | |
Description: Manipulating normal distributions. | |
""" | |
import numpy as np | |
import numpy.linalg as la | |
import numpy.random as npr | |
import random as pr | |
npa = np.array | |
ix = np.ix_ # urgh - sometimes numpy is ugly! | |
class Normal(object): | |
""" | |
A class for storing the parameters of a multivariate normal | |
distribution. Supports evaluation, sampling, conditioning and | |
marginalization. | |
""" | |
def __init__(self, dim, mu = None, sigma = None, data = None, | |
parent = None, cond = None, margin = None): | |
""" | |
Initialize a normal distribution. | |
Parameters | |
---------- | |
dim : int | |
Number of dimensions (e.g. number of components in the mu parameter). | |
mu : array, optional | |
The mean of the normal distribution. | |
sigma : array, optional | |
The covariance matrix of the normal distribution. | |
data : array, optional | |
If provided, the parameters of the distribution will be estimated from the data. Rows are observations, columns are components. | |
parent : Normal, optional | |
A reference to a parent distribution that was marginalized or conditioned. | |
cond : dict, optional | |
A dict of parameters describing how the parent distribution was conditioned. | |
margin : dict, optional | |
A dict of parameters describing how the parent distribution was marginalized. | |
Examples | |
-------- | |
>>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) | |
>>> print x | |
[ 0.1 0.7] | |
[[ 0.6 0.4] | |
[ 0.4 0.6]] | |
To condition on a value (and index): | |
>>> condx = x.condition([0],0.1) | |
>>> print condx | |
[ 0.7] | |
[[ 0.33333333]] | |
""" | |
self.dim = dim # full data dimension | |
if not mu is None and not sigma is None: | |
pass | |
elif not data is None: | |
# estimate the parameters from data - rows are samples, cols are variables | |
mu, sigma = self.estimate(data) | |
else: | |
# generate random means | |
mu = npr.randn(dim) | |
sigma = np.eye(dim) | |
self.cond = cond | |
self.margin = margin | |
self.parent = parent | |
self.update(npa(mu),npa(sigma)) | |
def update(self, mu, sigma): | |
""" | |
Update the distribution with new parameters. | |
Parameters | |
---------- | |
mu : array | |
The new mean parameters. | |
sigma : array | |
The new covariance matrix. | |
Example | |
------- | |
>>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) | |
>>> print x | |
[ 0.1 0.7] | |
[[ 0.6 0.4] | |
[ 0.4 0.6]] | |
>>> x.update(np.array([0.0,0.0]), x.E) | |
>>> print x | |
[ 0.0 0.0] | |
[[ 0.6 0.4] | |
[ 0.4 0.6]] | |
""" | |
self.mu = mu | |
self.E = sigma | |
det = None | |
if self.dim == 1: | |
self.A = 1.0 / self.E | |
det = np.fabs(self.E[0]) | |
else: | |
self.A = la.inv(self.E) # precision matrix | |
det = np.fabs(la.det(self.E)) | |
self.factor = (2.0 * np.pi)**(self.dim / 2.0) * (det)**(0.5) | |
def __str__(self): | |
return "%s\n%s" % (str(self.mu), str(self.E)) | |
def mean(self): | |
return self.mu | |
def covariance(self): | |
return self.E | |
def pdf(self, x): | |
dx = x - self.mu | |
A = self.A | |
fE = self.factor | |
return np.exp(-0.5 * np.dot(np.dot(dx,A),dx)) / fE | |
def pdf_mesh(self, x, y): | |
# for 2d meshgrids | |
# use matplotlib.mlab.bivariate_normal -- faster (vectorized) | |
z = np.zeros((len(y),len(x))) | |
for (i,v) in enumerate(x): | |
for (j,w) in enumerate(y): | |
z[j,i] = self.pdf([v,w]) | |
return z | |
def simulate(self, ndata = 100): | |
""" | |
Draw pts from the distribution. | |
""" | |
return npr.multivariate_normal(self.mu, self.E, ndata) | |
def estimate(self, data): | |
mu = np.mean(data, axis=0) | |
sigma = np.cov(data, rowvar=0) | |
return mu, sigma | |
def marginalize(self, indices): | |
""" | |
Creates a new marginal normal distribution for ''indices''. | |
""" | |
indices = npa(indices) | |
return Normal(len(indices), mu = self.mu[indices], sigma = self.E[ix(indices,indices)], margin = {'indices' : indices}, parent = self) | |
def condition(self, indices, x): | |
""" | |
Creates a new normal distribution conditioned on the data x at indices. | |
""" | |
idim = indices | |
odim = npa([i for i in range(self.dim) if not i in indices]) | |
Aaa = self.A[ix(odim,odim)] | |
Aab = self.A[ix(odim,idim)] | |
iAaa = None | |
det = None | |
if len(odim) == 1: # linalg does not handle d1 arrays | |
iAaa = 1.0 / Aaa | |
det = np.fabs(iAaa[0]) | |
else: | |
iAaa = la.inv(Aaa) | |
det = np.fabs(la.det(iAaa)) | |
# compute the new mu | |
premu = np.dot(iAaa, Aab) | |
mub = self.mu[idim] | |
mua = self.mu[odim] | |
new_mu = mua - np.dot(premu, (x - mub)) | |
new_E = iAaa | |
return Normal(len(odim), mu = new_mu, sigma = new_E, | |
cond = {'data' : x, 'indices' : indices}, | |
parent = self) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment