kcarnold/dpgmm.py

## dpgmm.py
# Dirichlet process Gaussian mixture model

import numpy as np
from scipy.special import gammaln
from scipy.linalg import cholesky
from sliceSample import sliceSample

def multinomialDraw(dist):
    """Returns a single draw from the given multinomial distribution."""
    return np.random.multinomial(1, dist).argmax()


def normpdf(x, mu, var):
    return 1./(np.sqrt(2*np.pi*var))*np.exp(-(x - mu)**2/(2*var))

def lognormPdf(x, mu, var):
    return -.5*np.log(2*np.pi*var) - (x-mu)**2/(2*var)

def logLikelihood(observations, means, precisions, mixProps):
    r = 0.
    for i in xrange(len(mixProps)):
        r += np.log(mixProps[i])+np.sum(lognormPdf(observations, means[i], 1./precisions[i]))
    return r

def rasGamma(shapish, mean):
    """Draw from the Gamma distribution with Rasmussen's parameterization: 2*shape and mean"""
    return np.random.gamma(shapish/2., 2.*mean/shapish)

class DPGMM(object):
    def __init__(self, data, kappa=1.0):
        self.data = np.array(data)
        self.N, self.D = self.data.shape
        self.dataMean = np.mean(self.data)
        self.dataCov = np.cov(self.data)
        self.kappa = float(kappa)

        # Draw initial hyperparameter values
        self.lmb = np.random.multivariate_normal(self.dataMean, self.dataCov)
        self.r = np.random.gamma(self.kappa/2, 2/(self.kappa*self.dataVar))
        self.beta = 1./rasGamma(1, 1)
        self.w = rasGamma(1, self.dataVar)
        self.alpha = 1./rasGamma(1, 1)

        self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
        self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
        self.krep = 0 # number of components actually represented
        self.unusedIndices = set() # class indices that got removed.
        self.componentMeans = np.zeros(0) # mu_j
        self.componentPrecisions = np.zeros(0) # s_j
        self.iter = 0

    def _checkBookkeeping(self):
        krep = 0
        for j, occCount in enumerate(self.occupationCount):
            assert occCount == (self.latentClass == j).sum()
            if occCount > 0: krep += 1
        assert krep == self.krep
        if self.iter:
            assert self.occupationCount.sum() == self.N
        for j in self.unusedIndices:
            assert self.occupationCount[j] == 0
            assert self.componentMeans[j] == 0

    def step(self):
        self.sampleClassAssignments()
        self.sampleComponentParameters()
        self.sampleHyperparameters()
        self.iter += 1

    def sampleClassAssignments(self):
        for i in xrange(self.N):
            obs = self.data[i]
            prevClass = self.latentClass[i]
            if self.iter:
                self.occupationCount[prevClass] -= 1
                assert self.occupationCount[prevClass] >= 0

            classes = []
            weights = []
            def addClass(cls, occCount, obs, componentMean, componentPrecision):
                classes.append(cls)
                weights.append(
                    occCount/(self.N-1+self.alpha)*
                    normpdf(obs, componentMean, 1./componentPrecision))

            # Components with other observations associated
            for j, occCount in enumerate(self.occupationCount):
                if occCount == 0: continue
                addClass(j, occCount, obs, self.componentMeans[j], self.componentPrecisions[j])

            # probability of generating a new class
            if self.iter and self.occupationCount[prevClass] == 0:
                # "Peculiar situation": use the class parameters
                addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentPrecisions[prevClass])
            else:
                # unrepresented class
                # Sample a component mean and precision
                newComponentMean = np.random.normal(self.lmb, 1./np.sqrt(self.r))
                newComponentPrecision = rasGamma(self.beta, 1./self.w)
                addClass(None, self.alpha, obs, newComponentMean, newComponentPrecision)

            # Sample from the categorical distribution
            weights = np.array(weights)
            weights /= weights.sum()
            newClass = classes[multinomialDraw(weights)]

            if newClass is None:
                # Allocate a new class!
                if self.unusedIndices:
                    newClass = self.unusedIndices.pop()
                else:
                    newClass = len(self.occupationCount)
                    self.occupationCount.resize(newClass+1, refcheck=False)
                    self.componentMeans.resize(newClass+1, refcheck=False)
                    self.componentPrecisions.resize(newClass+1, refcheck=False)
                self.occupationCount[newClass] = 0 # will increment later
                self.componentMeans[newClass] = newComponentMean
                self.componentPrecisions[newClass] = newComponentPrecision
                self.krep += 1

            # Put this observation in that class.
            assert newClass not in self.unusedIndices
            self.latentClass[i] = newClass
            self.occupationCount[newClass] += 1
            if self.occupationCount[prevClass] == 0:
                self.componentMeans[prevClass] = self.componentPrecisions[prevClass] = 0
                self.unusedIndices.add(prevClass)
                self.krep -= 1

    def sampleComponentParameters(self):
        for j, occCount in enumerate(self.occupationCount):
            if not occCount: continue
            dataInComponent = self.data[self.latentClass==j]
            assert len(dataInComponent) == occCount
            componentPrecision = self.componentPrecisions[j]
            precision = occCount*componentPrecision + self.r
            self.componentMeans[j] = np.random.normal(
                (dataInComponent.sum()*componentPrecision+self.lmb*self.r)/precision,
                1./np.sqrt(precision))
            self.componentPrecisions[j] = np.random.gamma(
                (self.beta+occCount)/2.,
                2./(self.w*self.beta + ((dataInComponent-self.componentMeans[j])**2).sum()))

    def sampleHyperparameters(self):
        # lambda
        lmbMean = (self.dataMean/self.dataVar + self.r*self.componentMeans.sum())/(1./self.dataVar + self.krep*self.r)
        lmbVar = 1./(1./self.dataVar + self.krep*self.r)
        self.lmb = np.random.normal(lmbMean, np.sqrt(lmbVar))

        # r
        rShape = (self.krep + self.kappa)/2.
        rScale = (self.kappa*self.dataVar+np.sum((self.componentMeans[self.occupationCount != 0]-self.lmb)**2))
        self.r = np.random.gamma(rShape, rScale)

        # w
        wShape = (self.krep*self.beta+1)/2.
        wScale = 2./(self.dataVar+self.beta*self.componentPrecisions.sum()) #unoccupied precisions are 0.
        self.w = np.random.gamma(wShape, wScale)

        # beta
        self.beta = gibbsStep_beta(self.beta, self.componentPrecisions, self.w, self.krep)

        # alpha
        self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)

    def gewekeStep(self):
        # Fantasize data from the current model state
        self.data = np.zeros(self.N)
        for i in xrange(self.N):
            component = self.latentClass[i]
            self.data[i] = np.random.normal(self.componentMeans[component], 1./np.sqrt(self.componentPrecisions[component]))

def gibbsStep_alpha(alpha, k, n):
    def logPAlpha(alpha):
        if 0 < alpha < 100*n:
            return (k-1.5)*np.log(alpha) - 1./(2*alpha) + gammaln(alpha) - gammaln(n+alpha)
        return -np.inf
    return sliceSample(logPAlpha, alpha, w=1)

def gibbsStep_beta(beta, componentPrecisions, w, krep):
    precisions = componentPrecisions[componentPrecisions.nonzero()]
    def logPBeta(beta):
        if 0 < beta:
            b2 = beta/2.
            wp = w*precisions
            return -krep*gammaln(b2) - 1./(2.*beta) + (krep*beta-3.)/2.*np.log(b2) + (b2*np.log(wp)-b2*wp).sum()
        return -np.inf
    return sliceSample(logPBeta, beta, w=100)

def crpDraw(alpha, N):
    """Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
    occ = []
    for i in xrange(N):
        u = np.random.rand() * (alpha+i)
        if u < alpha:
            # new table
            occ.append(1)
        else:
            # u ~ U(alpha, alpha+i)
            u -= alpha
            assert u<i
            assert sum(occ) == i
            for table, n in enumerate(occ):
                if u < n:
                    occ[table] += 1
                    break
                u -= n
    return occ

def validateAlphaPosterior(nCustomers, nIter):
    alphaSamples = np.zeros(nIter)
    alpha = 1.0
    for i in xrange(nIter):
        d = crpDraw(alpha, nCustomers)
        alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
        alphaSamples[i] = alpha
    return alphaSamples

def gewekeComponentParameters(N):
    model = DPGMM(np.random.normal(0, 1, 500))
    model.latentClass[:250] = 1
    model.occupationCount = np.array([250, 250]).astype(np.uint32)
    model.lmb = 0.
    model.r = 1.
    model.componentMeans.resize(2)
    model.componentPrecisions.resize(2)
    model.componentMeans.fill(model.lmb)
    model.componentPrecisions.fill(model.r)
    meanSamples = np.zeros(N*2)
    precisionSamples = np.zeros(N*2)
    for i in xrange(N):
        model.gewekeStep()
        model.sampleComponentParameters()
        meanSamples[2*i:2*i+2] = model.componentMeans
        precisionSamples[2*i:2*i+2] = model.componentPrecisions
    return model, meanSamples, precisionSamples

def gewekeTest(N=1000):
    model = DPGMM(np.random.normal(0, 1, 1))
    alphaSamples = np.zeros(N)
    for i in xrange(N):
        print i
        model.step()
        model.gewekeStep()
        #model._checkBookkeeping()
        alphaSamples[i] = model.alpha
    return alphaSamples

def testSingleComponent():
    data = np.random.normal(0, 1, 500)
    model = DPGMM(data)
    model._checkBookkeeping()
    model.step()
    model._checkBookkeeping()
    return model

def testTwoComponents():
    data = np.r_[np.random.normal(0, 1, 250),
                 np.random.normal(10, 2, 250)]
    model = DPGMM(data)
    model._checkBookkeeping()
    model.step()
    model._checkBookkeeping()
    print "Occ:", model.occupationCount
    print "Means:", model.componentMeans
    print "Precisions:", model.componentPrecisions
    return model


def wishartDraw(df, V):
    # stupid way
    #X = np.random.multivariate_normal(np.zeros(len(V)), V, n)
    #return np.dot(X.T, X)

    # Bartlett decomposition
    n = len(V)
    L = cholesky(V) # lower-triangular factor
    A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
    A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
    X = np.dot(L, A)
    return np.dot(X, X.T)


## dpgmm2.py
# Dirichlet process Gaussian mixture model.
# This one uses Normal-Wishart priors, then everything comes out better.

import numpy as np
from scipy.special import gammaln
import scipy.linalg as LA
from sliceSample import sliceSample

def covForKnownMean(x, mean):
    q = x - mean
    return q.T.dot(q)

def multinomialDraw(dist):
    """Returns a single draw from the given multinomial distribution."""
    return np.random.multinomial(1, dist).argmax()

def mvpdf(obs, mean, invCov, det):
    # Forget the (2pi)^(d/2) factor.
    q = obs-mean
    return det * np.exp(-q.T.dot(invCov).dot(q)/2.)

def wishartDraw(df, V):
    # Bartlett decomposition
    n = len(V)
    L = LA.cholesky(V) # lower-triangular factor
    A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
    A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
    X = np.dot(L, A)
    return np.dot(X, X.T)

def normalWishartDraw(mu, kappa, df, cov):
    D = len(mu)
    precision = wishartDraw(df, LA.inv(cov)) # DUDE FIXME.
    mean = LA.solve_triangular(LA.cholesky(kappa * precision), np.random.standard_normal(D),
                               lower=True, overwrite_b=True) + mu
    return mean, precision

def normalWishartPosteriorDraw(mu0, kappa, df, cov, n, xbar, sampleCovariance):
    kappa_n = kappa + n
    mu_n = (kappa*mu0 + n*xbar) / kappa_n
    Tn = cov + sampleCovariance + kappa*n / kappa_n *covForKnownMean(mu0, xbar)
    df_n = df + n
    return normalWishartDraw(mu_n, kappa_n, df_n, Tn)


class DPGMM(object):
    def __init__(self, data, mu0, kappa, df, cov):
        self.data = np.array(data)
        self.N, self.D = self.data.shape
        self.mu0 = mu0
        self.kappa = kappa
        self.df = df
        self.cov = cov
        self.cov_chol = LA.cholesky(self.cov) # lower-triangular factor

        # Draw initial hyperparameter values
        self.alpha = 1./np.random.gamma(.5, 2)

        self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
        self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
        self.krep = 0 # number of components actually represented
        self.unusedIndices = set() # class indices that got removed.
        self.componentMeans = np.zeros((0, self.D)) # mu_j
        self.componentInvCovs = np.zeros((0, self.D, self.D)) # s_j
        self.precisionDets = np.zeros(0)
        self.iter = 0

    def _checkBookkeeping(self):
        krep = 0
        for j, occCount in enumerate(self.occupationCount):
            assert occCount == (self.latentClass == j).sum()
            if occCount > 0: krep += 1
        assert krep == self.krep
        if self.iter:
            assert self.occupationCount.sum() == self.N
        for j in self.unusedIndices:
            assert self.occupationCount[j] == 0
            assert self.componentMeans[j] == 0

    def step(self):
        self.sampleClassAssignments()
        self.sampleComponentParameters()
        self.sampleHyperparameters()
        self.iter += 1

    def sampleClassAssignments(self):
        for i in xrange(self.N):
            obs = self.data[i]
            prevClass = self.latentClass[i]
            if self.iter:
                self.occupationCount[prevClass] -= 1
                assert self.occupationCount[prevClass] >= 0

            classes = []
            weights = []
            def addClass(cls, occCount, obs, componentMean, componentInvCov, det):
                classes.append(cls)
                weights.append(
                    occCount/(self.N-1+self.alpha)* # prior
                    mvpdf(obs, componentMean, componentInvCov, det))

            # Components with other observations associated
            for j, occCount in enumerate(self.occupationCount):
                if occCount == 0: continue
                addClass(j, occCount, obs, self.componentMeans[j], self.componentInvCovs[j],
                         self.precisionDets[j])

            # probability of generating a new class
            if self.iter and self.occupationCount[prevClass] == 0:
                # "Peculiar situation": use the class parameters
                addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentInvCovs[prevClass], self.precisionDets[j])
            else:
                # unrepresented class
                # Sample a component inv-covariance and mean from the normal-Wishart
                newComponentMean, newComponentInvCov = normalWishartDraw(self.mu0, self.kappa, self.df, self.cov)
                addClass(None, self.alpha, obs, newComponentMean, newComponentInvCov, LA.det(newComponentInvCov))

            # Sample from the categorical distribution
            weights = np.array(weights)
            weights /= weights.sum()
            newClass = classes[multinomialDraw(weights)]

            if newClass is None:
                # Allocate a new class!
                if self.unusedIndices:
                    newClass = self.unusedIndices.pop()
                else:
                    newClass = len(self.occupationCount)
                    self.occupationCount.resize(newClass+1, refcheck=False)
                    self.componentMeans.resize(newClass+1, self.D, refcheck=False)
                    self.componentInvCovs.resize(newClass+1, self.D, self.D, refcheck=False)
                    self.precisionDets.resize(newClass+1, refcheck=False)
                self.occupationCount[newClass] = 0 # will increment later
                self.componentMeans[newClass] = newComponentMean
                self.componentInvCovs[newClass] = newComponentInvCov
                self.precisionDets[newClass] = LA.det(newComponentInvCov)
                self.krep += 1

            # Put this observation in that class.
            assert newClass not in self.unusedIndices
            self.latentClass[i] = newClass
            self.occupationCount[newClass] += 1
            if self.occupationCount[prevClass] == 0:
                self.componentMeans[prevClass] = self.componentInvCovs[prevClass] = self.precisionDets[prevClass] = 0
                self.unusedIndices.add(prevClass)
                self.krep -= 1

    def sampleComponentParameters(self):
        for j, occCount in enumerate(self.occupationCount):
            if not occCount: continue
            dataInComponent = self.data[self.latentClass==j]
            assert len(dataInComponent) == occCount
            mean = dataInComponent.mean(axis=0)
            S = covForKnownMean(dataInComponent, mean) # sample covariance of data in component
            self.componentMeans[j], self.componentInvCovs[j] = normalWishartPosteriorDraw(
                self.mu0, self.kappa, self.df, self.cov, occCount, mean, S)
            self.precisionDets[j] = LA.det(self.componentInvCovs[j])

    def sampleHyperparameters(self):
        # alpha
        self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)

    def showState(self):
        from matplotlib import pyplot as plt
        plt.plot(self.data[:,0], self.data[:,1], '+', color='black')
        colors = plt.cm.rainbow(np.linspace(0,1,10))
        for i, j in enumerate(np.argsort(self.occupationCount)[-10:]):
            if self.occupationCount[j] == 0: continue
            plotEllipse(self.componentMeans[j], self.componentInvCovs[j], color=colors[i])
            data = self.data[self.latentClass == j]
            plt.plot(data[:,0], data[:,1], color=colors[i], linestyle='None', marker='+')

    def gewekeStep(self):
        # Fantasize data from the current model state
        self.data = np.zeros(self.N)
        chols = {}
        for j, occCount in enumerate(self.occupationCount):
            if not occCount: continue
            chols[j] = LA.cholesky(self.componentInvCovs[j])
        for i in xrange(self.N):
            component = self.latentClass[i]
            self.data[i] = chols[component].dot(np.random.standard_normal(self.D)) + self.componentMeans[component]

def gibbsStep_alpha(alpha, k, n):
    def logPAlpha(alpha):
        if 0 < alpha < 100*n:
            return (k-1.5)*np.log(alpha) - 1./(2*alpha) + gammaln(alpha) - gammaln(n+alpha)
        return -np.inf
    return sliceSample(logPAlpha, alpha, w=1)

def crpDraw(alpha, N):
    """Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
    occ = []
    for i in xrange(N):
        u = np.random.rand() * (alpha+i)
        if u < alpha:
            # new table
            occ.append(1)
        else:
            # u ~ U(alpha, alpha+i)
            u -= alpha
            assert u<i
            assert sum(occ) == i
            for table, n in enumerate(occ):
                if u < n:
                    occ[table] += 1
                    break
                u -= n
    return occ

def validateAlphaPosterior(nCustomers, nIter):
    alphaSamples = np.zeros(nIter)
    alpha = 1.0
    for i in xrange(nIter):
        d = crpDraw(alpha, nCustomers)
        alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
        alphaSamples[i] = alpha
    return alphaSamples

def gewekeComponentParameters(N):
    model = DPGMM(np.random.normal(0, 1, 500))
    model.latentClass[:250] = 1
    model.occupationCount = np.array([250, 250]).astype(np.uint32)
    model.lmb = 0.
    model.r = 1.
    model.componentMeans.resize(2)
    model.componentInvCovs.resize(2)
    model.componentMeans.fill(model.lmb)
    model.componentInvCovs.fill(model.r)
    meanSamples = np.zeros(N*2)
    precisionSamples = np.zeros(N*2)
    for i in xrange(N):
        model.gewekeStep()
        model.sampleComponentParameters()
        meanSamples[2*i:2*i+2] = model.componentMeans
        precisionSamples[2*i:2*i+2] = model.componentInvCovs
    return model, meanSamples, precisionSamples

def gewekeTest(N=1000):
    model = DPGMM(np.random.normal(0, 1, 1))
    alphaSamples = np.zeros(N)
    for i in xrange(N):
        print i
        model.step()
        model.gewekeStep()
        #model._checkBookkeeping()
        alphaSamples[i] = model.alpha
    return alphaSamples

def testSingleComponent():
    data = np.random.normal(0, 1, 500)
    model = DPGMM(data)
    model._checkBookkeeping()
    model.step()
    model._checkBookkeeping()
    return model

def testTwoComponents():
    data = np.r_[np.random.normal(0, 1, 250),
                 np.random.normal(10, 2, 250)]
    model = DPGMM(data)
    model._checkBookkeeping()
    model.step()
    model._checkBookkeeping()
    print "Occ:", model.occupationCount
    print "Means:", model.componentMeans
    print "Precisions:", model.componentInvCovs
    return model

def plotEllipse(mean, prec, **kwargs):
    from matplotlib import pyplot as plt
    vals, vecs = LA.eig(prec, right=True)
    vals = np.sqrt(1./np.real(vals))
    theta = np.linspace(0, 2*np.pi, 100)
    points = vals[0]*vecs[0]*np.sin(theta)[:,np.newaxis] + vals[1]*vecs[1]*np.cos(theta)[:,np.newaxis]
    points += mean
    return plt.plot(points[:,0], points[:,1], **kwargs)

## sliceSample.py
import numpy as np

eps = 1e-10

def sliceSample(logpdf, x0, w=1.0, positive=False):
    pp = logpdf(x0)
    z = pp - np.random.exponential(1.0)
    # Sample from the slice -- logpdf(x)<z
    # - pick an initial interval of width w, containing x0
    L = x0 - np.random.rand()*w
    if positive and L<0: L=eps
    R = L+w
    # - step out (no size limit)
    while True:
        if z < logpdf(L):
            L -= w
            if positive and L<0:
                L=eps
                break
        else:
            break
    while True:
        if z < logpdf(R):
            R += w
        else:
            break
    #print "Stepped out to L={}, R={}, pdf={}:{}".format(L, R, pdf(L), pdf(R))
    # - sample from the part of the slice within (L, R)
    while True:
        x1 = L + np.random.rand()*(R-L)
        pp = logpdf(x1)
        #print '[{},{}] Sampled x1={}, pdf(state)={}'.format(L, R, x1, pp)
        if z < pp:
            break
        else:
            if x1 < x0:
                L = x1
            else:
                R = x1;
    return x1

def testSliceSample():
    N = 10000
    samples = np.zeros(N)
    x = 0
    mu = 0.; var = 1.
    def logpdf(x):
        return -.5*np.log(2*np.pi*var) - (x-mu)**2/(2*var)
    for i in xrange(N):
        x = sliceSample(logpdf, x, positive=True)
        samples[i] = x
    return samples
	# Dirichlet process Gaussian mixture model

	import numpy as np
	from scipy.special import gammaln
	from scipy.linalg import cholesky
	from sliceSample import sliceSample

	def multinomialDraw(dist):
	"""Returns a single draw from the given multinomial distribution."""
	return np.random.multinomial(1, dist).argmax()


	def normpdf(x, mu, var):
	return 1./(np.sqrt(2np.pivar))np.exp(-(x - mu)2/(2var))

	def lognormPdf(x, mu, var):
	return -.5np.log(2np.pivar) - (x-mu)2/(2var)

	def logLikelihood(observations, means, precisions, mixProps):
	r = 0.
	for i in xrange(len(mixProps)):
	r += np.log(mixProps[i])+np.sum(lognormPdf(observations, means[i], 1./precisions[i]))
	return r

	def rasGamma(shapish, mean):
	"""Draw from the Gamma distribution with Rasmussen's parameterization: 2*shape and mean"""
	return np.random.gamma(shapish/2., 2.*mean/shapish)

	class DPGMM(object):
	def __init__(self, data, kappa=1.0):
	self.data = np.array(data)
	self.N, self.D = self.data.shape
	self.dataMean = np.mean(self.data)
	self.dataCov = np.cov(self.data)
	self.kappa = float(kappa)

	# Draw initial hyperparameter values
	self.lmb = np.random.multivariate_normal(self.dataMean, self.dataCov)
	self.r = np.random.gamma(self.kappa/2, 2/(self.kappa*self.dataVar))
	self.beta = 1./rasGamma(1, 1)
	self.w = rasGamma(1, self.dataVar)
	self.alpha = 1./rasGamma(1, 1)

	self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
	self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
	self.krep = 0 # number of components actually represented
	self.unusedIndices = set() # class indices that got removed.
	self.componentMeans = np.zeros(0) # mu_j
	self.componentPrecisions = np.zeros(0) # s_j
	self.iter = 0

	def _checkBookkeeping(self):
	krep = 0
	for j, occCount in enumerate(self.occupationCount):
	assert occCount == (self.latentClass == j).sum()
	if occCount > 0: krep += 1
	assert krep == self.krep
	if self.iter:
	assert self.occupationCount.sum() == self.N
	for j in self.unusedIndices:
	assert self.occupationCount[j] == 0
	assert self.componentMeans[j] == 0

	def step(self):
	self.sampleClassAssignments()
	self.sampleComponentParameters()
	self.sampleHyperparameters()
	self.iter += 1

	def sampleClassAssignments(self):
	for i in xrange(self.N):
	obs = self.data[i]
	prevClass = self.latentClass[i]
	if self.iter:
	self.occupationCount[prevClass] -= 1
	assert self.occupationCount[prevClass] >= 0

	classes = []
	weights = []
	def addClass(cls, occCount, obs, componentMean, componentPrecision):
	classes.append(cls)
	weights.append(
	occCount/(self.N-1+self.alpha)*
	normpdf(obs, componentMean, 1./componentPrecision))

	# Components with other observations associated
	for j, occCount in enumerate(self.occupationCount):
	if occCount == 0: continue
	addClass(j, occCount, obs, self.componentMeans[j], self.componentPrecisions[j])

	# probability of generating a new class
	if self.iter and self.occupationCount[prevClass] == 0:
	# "Peculiar situation": use the class parameters
	addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentPrecisions[prevClass])
	else:
	# unrepresented class
	# Sample a component mean and precision
	newComponentMean = np.random.normal(self.lmb, 1./np.sqrt(self.r))
	newComponentPrecision = rasGamma(self.beta, 1./self.w)
	addClass(None, self.alpha, obs, newComponentMean, newComponentPrecision)

	# Sample from the categorical distribution
	weights = np.array(weights)
	weights /= weights.sum()
	newClass = classes[multinomialDraw(weights)]

	if newClass is None:
	# Allocate a new class!
	if self.unusedIndices:
	newClass = self.unusedIndices.pop()
	else:
	newClass = len(self.occupationCount)
	self.occupationCount.resize(newClass+1, refcheck=False)
	self.componentMeans.resize(newClass+1, refcheck=False)
	self.componentPrecisions.resize(newClass+1, refcheck=False)
	self.occupationCount[newClass] = 0 # will increment later
	self.componentMeans[newClass] = newComponentMean
	self.componentPrecisions[newClass] = newComponentPrecision
	self.krep += 1

	# Put this observation in that class.
	assert newClass not in self.unusedIndices
	self.latentClass[i] = newClass
	self.occupationCount[newClass] += 1
	if self.occupationCount[prevClass] == 0:
	self.componentMeans[prevClass] = self.componentPrecisions[prevClass] = 0
	self.unusedIndices.add(prevClass)
	self.krep -= 1

	def sampleComponentParameters(self):
	for j, occCount in enumerate(self.occupationCount):
	if not occCount: continue
	dataInComponent = self.data[self.latentClass==j]
	assert len(dataInComponent) == occCount
	componentPrecision = self.componentPrecisions[j]
	precision = occCount*componentPrecision + self.r
	self.componentMeans[j] = np.random.normal(
	(dataInComponent.sum()componentPrecision+self.lmbself.r)/precision,
	1./np.sqrt(precision))
	self.componentPrecisions[j] = np.random.gamma(
	(self.beta+occCount)/2.,
	2./(self.wself.beta + ((dataInComponent-self.componentMeans[j])*2).sum()))

	def sampleHyperparameters(self):
	# lambda
	lmbMean = (self.dataMean/self.dataVar + self.rself.componentMeans.sum())/(1./self.dataVar + self.krepself.r)
	lmbVar = 1./(1./self.dataVar + self.krep*self.r)
	self.lmb = np.random.normal(lmbMean, np.sqrt(lmbVar))

	# r
	rShape = (self.krep + self.kappa)/2.
	rScale = (self.kappaself.dataVar+np.sum((self.componentMeans[self.occupationCount != 0]-self.lmb)*2))
	self.r = np.random.gamma(rShape, rScale)

	# w
	wShape = (self.krep*self.beta+1)/2.
	wScale = 2./(self.dataVar+self.beta*self.componentPrecisions.sum()) #unoccupied precisions are 0.
	self.w = np.random.gamma(wShape, wScale)

	# beta
	self.beta = gibbsStep_beta(self.beta, self.componentPrecisions, self.w, self.krep)

	# alpha
	self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)

	def gewekeStep(self):
	# Fantasize data from the current model state
	self.data = np.zeros(self.N)
	for i in xrange(self.N):
	component = self.latentClass[i]
	self.data[i] = np.random.normal(self.componentMeans[component], 1./np.sqrt(self.componentPrecisions[component]))

	def gibbsStep_alpha(alpha, k, n):
	def logPAlpha(alpha):
	if 0 < alpha < 100*n:
	return (k-1.5)np.log(alpha) - 1./(2alpha) + gammaln(alpha) - gammaln(n+alpha)
	return -np.inf
	return sliceSample(logPAlpha, alpha, w=1)

	def gibbsStep_beta(beta, componentPrecisions, w, krep):
	precisions = componentPrecisions[componentPrecisions.nonzero()]
	def logPBeta(beta):
	if 0 < beta:
	b2 = beta/2.
	wp = w*precisions
	return -krepgammaln(b2) - 1./(2.beta) + (krepbeta-3.)/2.np.log(b2) + (b2np.log(wp)-b2wp).sum()
	return -np.inf
	return sliceSample(logPBeta, beta, w=100)

	def crpDraw(alpha, N):
	"""Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
	occ = []
	for i in xrange(N):
	u = np.random.rand() * (alpha+i)
	if u < alpha:
	# new table
	occ.append(1)
	else:
	# u ~ U(alpha, alpha+i)
	u -= alpha
	assert u<i
	assert sum(occ) == i
	for table, n in enumerate(occ):
	if u < n:
	occ[table] += 1
	break
	u -= n
	return occ

	def validateAlphaPosterior(nCustomers, nIter):
	alphaSamples = np.zeros(nIter)
	alpha = 1.0
	for i in xrange(nIter):
	d = crpDraw(alpha, nCustomers)
	alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
	alphaSamples[i] = alpha
	return alphaSamples

	def gewekeComponentParameters(N):
	model = DPGMM(np.random.normal(0, 1, 500))
	model.latentClass[:250] = 1
	model.occupationCount = np.array([250, 250]).astype(np.uint32)
	model.lmb = 0.
	model.r = 1.
	model.componentMeans.resize(2)
	model.componentPrecisions.resize(2)
	model.componentMeans.fill(model.lmb)
	model.componentPrecisions.fill(model.r)
	meanSamples = np.zeros(N*2)
	precisionSamples = np.zeros(N*2)
	for i in xrange(N):
	model.gewekeStep()
	model.sampleComponentParameters()
	meanSamples[2i:2i+2] = model.componentMeans
	precisionSamples[2i:2i+2] = model.componentPrecisions
	return model, meanSamples, precisionSamples

	def gewekeTest(N=1000):
	model = DPGMM(np.random.normal(0, 1, 1))
	alphaSamples = np.zeros(N)
	for i in xrange(N):
	print i
	model.step()
	model.gewekeStep()
	#model._checkBookkeeping()
	alphaSamples[i] = model.alpha
	return alphaSamples

	def testSingleComponent():
	data = np.random.normal(0, 1, 500)
	model = DPGMM(data)
	model._checkBookkeeping()
	model.step()
	model._checkBookkeeping()
	return model

	def testTwoComponents():
	data = np.r_[np.random.normal(0, 1, 250),
	np.random.normal(10, 2, 250)]
	model = DPGMM(data)
	model._checkBookkeeping()
	model.step()
	model._checkBookkeeping()
	print "Occ:", model.occupationCount
	print "Means:", model.componentMeans
	print "Precisions:", model.componentPrecisions
	return model


	def wishartDraw(df, V):
	# stupid way
	#X = np.random.multivariate_normal(np.zeros(len(V)), V, n)
	#return np.dot(X.T, X)

	# Bartlett decomposition
	n = len(V)
	L = cholesky(V) # lower-triangular factor
	A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
	A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
	X = np.dot(L, A)
	return np.dot(X, X.T)
	# Dirichlet process Gaussian mixture model.
	# This one uses Normal-Wishart priors, then everything comes out better.

	import numpy as np
	from scipy.special import gammaln
	import scipy.linalg as LA
	from sliceSample import sliceSample

	def covForKnownMean(x, mean):
	q = x - mean
	return q.T.dot(q)

	def multinomialDraw(dist):
	"""Returns a single draw from the given multinomial distribution."""
	return np.random.multinomial(1, dist).argmax()

	def mvpdf(obs, mean, invCov, det):
	# Forget the (2pi)^(d/2) factor.
	q = obs-mean
	return det * np.exp(-q.T.dot(invCov).dot(q)/2.)

	def wishartDraw(df, V):
	# Bartlett decomposition
	n = len(V)
	L = LA.cholesky(V) # lower-triangular factor
	A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
	A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
	X = np.dot(L, A)
	return np.dot(X, X.T)

	def normalWishartDraw(mu, kappa, df, cov):
	D = len(mu)
	precision = wishartDraw(df, LA.inv(cov)) # DUDE FIXME.
	mean = LA.solve_triangular(LA.cholesky(kappa * precision), np.random.standard_normal(D),
	lower=True, overwrite_b=True) + mu
	return mean, precision

	def normalWishartPosteriorDraw(mu0, kappa, df, cov, n, xbar, sampleCovariance):
	kappa_n = kappa + n
	mu_n = (kappamu0 + nxbar) / kappa_n
	Tn = cov + sampleCovariance + kappan / kappa_n covForKnownMean(mu0, xbar)
	df_n = df + n
	return normalWishartDraw(mu_n, kappa_n, df_n, Tn)


	class DPGMM(object):
	def __init__(self, data, mu0, kappa, df, cov):
	self.data = np.array(data)
	self.N, self.D = self.data.shape
	self.mu0 = mu0
	self.kappa = kappa
	self.df = df
	self.cov = cov
	self.cov_chol = LA.cholesky(self.cov) # lower-triangular factor

	# Draw initial hyperparameter values
	self.alpha = 1./np.random.gamma(.5, 2)

	self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
	self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
	self.krep = 0 # number of components actually represented
	self.unusedIndices = set() # class indices that got removed.
	self.componentMeans = np.zeros((0, self.D)) # mu_j
	self.componentInvCovs = np.zeros((0, self.D, self.D)) # s_j
	self.precisionDets = np.zeros(0)
	self.iter = 0

	def _checkBookkeeping(self):
	krep = 0
	for j, occCount in enumerate(self.occupationCount):
	assert occCount == (self.latentClass == j).sum()
	if occCount > 0: krep += 1
	assert krep == self.krep
	if self.iter:
	assert self.occupationCount.sum() == self.N
	for j in self.unusedIndices:
	assert self.occupationCount[j] == 0
	assert self.componentMeans[j] == 0

	def step(self):
	self.sampleClassAssignments()
	self.sampleComponentParameters()
	self.sampleHyperparameters()
	self.iter += 1

	def sampleClassAssignments(self):
	for i in xrange(self.N):
	obs = self.data[i]
	prevClass = self.latentClass[i]
	if self.iter:
	self.occupationCount[prevClass] -= 1
	assert self.occupationCount[prevClass] >= 0

	classes = []
	weights = []
	def addClass(cls, occCount, obs, componentMean, componentInvCov, det):
	classes.append(cls)
	weights.append(
	occCount/(self.N-1+self.alpha)* # prior
	mvpdf(obs, componentMean, componentInvCov, det))

	# Components with other observations associated
	for j, occCount in enumerate(self.occupationCount):
	if occCount == 0: continue
	addClass(j, occCount, obs, self.componentMeans[j], self.componentInvCovs[j],
	self.precisionDets[j])

	# probability of generating a new class
	if self.iter and self.occupationCount[prevClass] == 0:
	# "Peculiar situation": use the class parameters
	addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentInvCovs[prevClass], self.precisionDets[j])
	else:
	# unrepresented class
	# Sample a component inv-covariance and mean from the normal-Wishart
	newComponentMean, newComponentInvCov = normalWishartDraw(self.mu0, self.kappa, self.df, self.cov)
	addClass(None, self.alpha, obs, newComponentMean, newComponentInvCov, LA.det(newComponentInvCov))

	# Sample from the categorical distribution
	weights = np.array(weights)
	weights /= weights.sum()
	newClass = classes[multinomialDraw(weights)]

	if newClass is None:
	# Allocate a new class!
	if self.unusedIndices:
	newClass = self.unusedIndices.pop()
	else:
	newClass = len(self.occupationCount)
	self.occupationCount.resize(newClass+1, refcheck=False)
	self.componentMeans.resize(newClass+1, self.D, refcheck=False)
	self.componentInvCovs.resize(newClass+1, self.D, self.D, refcheck=False)
	self.precisionDets.resize(newClass+1, refcheck=False)
	self.occupationCount[newClass] = 0 # will increment later
	self.componentMeans[newClass] = newComponentMean
	self.componentInvCovs[newClass] = newComponentInvCov
	self.precisionDets[newClass] = LA.det(newComponentInvCov)
	self.krep += 1

	# Put this observation in that class.
	assert newClass not in self.unusedIndices
	self.latentClass[i] = newClass
	self.occupationCount[newClass] += 1
	if self.occupationCount[prevClass] == 0:
	self.componentMeans[prevClass] = self.componentInvCovs[prevClass] = self.precisionDets[prevClass] = 0
	self.unusedIndices.add(prevClass)
	self.krep -= 1

	def sampleComponentParameters(self):
	for j, occCount in enumerate(self.occupationCount):
	if not occCount: continue
	dataInComponent = self.data[self.latentClass==j]
	assert len(dataInComponent) == occCount
	mean = dataInComponent.mean(axis=0)
	S = covForKnownMean(dataInComponent, mean) # sample covariance of data in component
	self.componentMeans[j], self.componentInvCovs[j] = normalWishartPosteriorDraw(
	self.mu0, self.kappa, self.df, self.cov, occCount, mean, S)
	self.precisionDets[j] = LA.det(self.componentInvCovs[j])

	def sampleHyperparameters(self):
	# alpha
	self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)

	def showState(self):
	from matplotlib import pyplot as plt
	plt.plot(self.data[:,0], self.data[:,1], '+', color='black')
	colors = plt.cm.rainbow(np.linspace(0,1,10))
	for i, j in enumerate(np.argsort(self.occupationCount)[-10:]):
	if self.occupationCount[j] == 0: continue
	plotEllipse(self.componentMeans[j], self.componentInvCovs[j], color=colors[i])
	data = self.data[self.latentClass == j]
	plt.plot(data[:,0], data[:,1], color=colors[i], linestyle='None', marker='+')

	def gewekeStep(self):
	# Fantasize data from the current model state
	self.data = np.zeros(self.N)
	chols = {}
	for j, occCount in enumerate(self.occupationCount):
	if not occCount: continue
	chols[j] = LA.cholesky(self.componentInvCovs[j])
	for i in xrange(self.N):
	component = self.latentClass[i]
	self.data[i] = chols[component].dot(np.random.standard_normal(self.D)) + self.componentMeans[component]

	def gibbsStep_alpha(alpha, k, n):
	def logPAlpha(alpha):
	if 0 < alpha < 100*n:
	return (k-1.5)np.log(alpha) - 1./(2alpha) + gammaln(alpha) - gammaln(n+alpha)
	return -np.inf
	return sliceSample(logPAlpha, alpha, w=1)

	def crpDraw(alpha, N):
	"""Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
	occ = []
	for i in xrange(N):
	u = np.random.rand() * (alpha+i)
	if u < alpha:
	# new table
	occ.append(1)
	else:
	# u ~ U(alpha, alpha+i)
	u -= alpha
	assert u<i
	assert sum(occ) == i
	for table, n in enumerate(occ):
	if u < n:
	occ[table] += 1
	break
	u -= n
	return occ

	def validateAlphaPosterior(nCustomers, nIter):
	alphaSamples = np.zeros(nIter)
	alpha = 1.0
	for i in xrange(nIter):
	d = crpDraw(alpha, nCustomers)
	alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
	alphaSamples[i] = alpha
	return alphaSamples

	def gewekeComponentParameters(N):
	model = DPGMM(np.random.normal(0, 1, 500))
	model.latentClass[:250] = 1
	model.occupationCount = np.array([250, 250]).astype(np.uint32)
	model.lmb = 0.
	model.r = 1.
	model.componentMeans.resize(2)
	model.componentInvCovs.resize(2)
	model.componentMeans.fill(model.lmb)
	model.componentInvCovs.fill(model.r)
	meanSamples = np.zeros(N*2)
	precisionSamples = np.zeros(N*2)
	for i in xrange(N):
	model.gewekeStep()
	model.sampleComponentParameters()
	meanSamples[2i:2i+2] = model.componentMeans
	precisionSamples[2i:2i+2] = model.componentInvCovs
	return model, meanSamples, precisionSamples

	def gewekeTest(N=1000):
	model = DPGMM(np.random.normal(0, 1, 1))
	alphaSamples = np.zeros(N)
	for i in xrange(N):
	print i
	model.step()
	model.gewekeStep()
	#model._checkBookkeeping()
	alphaSamples[i] = model.alpha
	return alphaSamples

	def testSingleComponent():
	data = np.random.normal(0, 1, 500)
	model = DPGMM(data)
	model._checkBookkeeping()
	model.step()
	model._checkBookkeeping()
	return model

	def testTwoComponents():
	data = np.r_[np.random.normal(0, 1, 250),
	np.random.normal(10, 2, 250)]
	model = DPGMM(data)
	model._checkBookkeeping()
	model.step()
	model._checkBookkeeping()
	print "Occ:", model.occupationCount
	print "Means:", model.componentMeans
	print "Precisions:", model.componentInvCovs
	return model

	def plotEllipse(mean, prec, **kwargs):
	from matplotlib import pyplot as plt
	vals, vecs = LA.eig(prec, right=True)
	vals = np.sqrt(1./np.real(vals))
	theta = np.linspace(0, 2*np.pi, 100)
	points = vals[0]vecs[0]np.sin(theta)[:,np.newaxis] + vals[1]vecs[1]np.cos(theta)[:,np.newaxis]
	points += mean
	return plt.plot(points[:,0], points[:,1], **kwargs)
	import numpy as np

	eps = 1e-10

	def sliceSample(logpdf, x0, w=1.0, positive=False):
	pp = logpdf(x0)
	z = pp - np.random.exponential(1.0)
	# Sample from the slice -- logpdf(x)<z
	# - pick an initial interval of width w, containing x0
	L = x0 - np.random.rand()*w
	if positive and L<0: L=eps
	R = L+w
	# - step out (no size limit)
	while True:
	if z < logpdf(L):
	L -= w
	if positive and L<0:
	L=eps
	break
	else:
	break
	while True:
	if z < logpdf(R):
	R += w
	else:
	break
	#print "Stepped out to L={}, R={}, pdf={}:{}".format(L, R, pdf(L), pdf(R))
	# - sample from the part of the slice within (L, R)
	while True:
	x1 = L + np.random.rand()*(R-L)
	pp = logpdf(x1)
	#print '[{},{}] Sampled x1={}, pdf(state)={}'.format(L, R, x1, pp)
	if z < pp:
	break
	else:
	if x1 < x0:
	L = x1
	else:
	R = x1;
	return x1

	def testSliceSample():
	N = 10000
	samples = np.zeros(N)
	x = 0
	mu = 0.; var = 1.
	def logpdf(x):
	return -.5np.log(2np.pivar) - (x-mu)2/(2var)
	for i in xrange(N):
	x = sliceSample(logpdf, x, positive=True)
	samples[i] = x
	return samples