Skip to content

Instantly share code, notes, and snippets.

@kcarnold
Created November 17, 2011 16:25
Show Gist options
  • Save kcarnold/1373604 to your computer and use it in GitHub Desktop.
Save kcarnold/1373604 to your computer and use it in GitHub Desktop.
DPGMM sampler
# Dirichlet process Gaussian mixture model
import numpy as np
from scipy.special import gammaln
from scipy.linalg import cholesky
from sliceSample import sliceSample
def multinomialDraw(dist):
"""Returns a single draw from the given multinomial distribution."""
return np.random.multinomial(1, dist).argmax()
def normpdf(x, mu, var):
return 1./(np.sqrt(2*np.pi*var))*np.exp(-(x - mu)**2/(2*var))
def lognormPdf(x, mu, var):
return -.5*np.log(2*np.pi*var) - (x-mu)**2/(2*var)
def logLikelihood(observations, means, precisions, mixProps):
r = 0.
for i in xrange(len(mixProps)):
r += np.log(mixProps[i])+np.sum(lognormPdf(observations, means[i], 1./precisions[i]))
return r
def rasGamma(shapish, mean):
"""Draw from the Gamma distribution with Rasmussen's parameterization: 2*shape and mean"""
return np.random.gamma(shapish/2., 2.*mean/shapish)
class DPGMM(object):
def __init__(self, data, kappa=1.0):
self.data = np.array(data)
self.N, self.D = self.data.shape
self.dataMean = np.mean(self.data)
self.dataCov = np.cov(self.data)
self.kappa = float(kappa)
# Draw initial hyperparameter values
self.lmb = np.random.multivariate_normal(self.dataMean, self.dataCov)
self.r = np.random.gamma(self.kappa/2, 2/(self.kappa*self.dataVar))
self.beta = 1./rasGamma(1, 1)
self.w = rasGamma(1, self.dataVar)
self.alpha = 1./rasGamma(1, 1)
self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
self.krep = 0 # number of components actually represented
self.unusedIndices = set() # class indices that got removed.
self.componentMeans = np.zeros(0) # mu_j
self.componentPrecisions = np.zeros(0) # s_j
self.iter = 0
def _checkBookkeeping(self):
krep = 0
for j, occCount in enumerate(self.occupationCount):
assert occCount == (self.latentClass == j).sum()
if occCount > 0: krep += 1
assert krep == self.krep
if self.iter:
assert self.occupationCount.sum() == self.N
for j in self.unusedIndices:
assert self.occupationCount[j] == 0
assert self.componentMeans[j] == 0
def step(self):
self.sampleClassAssignments()
self.sampleComponentParameters()
self.sampleHyperparameters()
self.iter += 1
def sampleClassAssignments(self):
for i in xrange(self.N):
obs = self.data[i]
prevClass = self.latentClass[i]
if self.iter:
self.occupationCount[prevClass] -= 1
assert self.occupationCount[prevClass] >= 0
classes = []
weights = []
def addClass(cls, occCount, obs, componentMean, componentPrecision):
classes.append(cls)
weights.append(
occCount/(self.N-1+self.alpha)*
normpdf(obs, componentMean, 1./componentPrecision))
# Components with other observations associated
for j, occCount in enumerate(self.occupationCount):
if occCount == 0: continue
addClass(j, occCount, obs, self.componentMeans[j], self.componentPrecisions[j])
# probability of generating a new class
if self.iter and self.occupationCount[prevClass] == 0:
# "Peculiar situation": use the class parameters
addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentPrecisions[prevClass])
else:
# unrepresented class
# Sample a component mean and precision
newComponentMean = np.random.normal(self.lmb, 1./np.sqrt(self.r))
newComponentPrecision = rasGamma(self.beta, 1./self.w)
addClass(None, self.alpha, obs, newComponentMean, newComponentPrecision)
# Sample from the categorical distribution
weights = np.array(weights)
weights /= weights.sum()
newClass = classes[multinomialDraw(weights)]
if newClass is None:
# Allocate a new class!
if self.unusedIndices:
newClass = self.unusedIndices.pop()
else:
newClass = len(self.occupationCount)
self.occupationCount.resize(newClass+1, refcheck=False)
self.componentMeans.resize(newClass+1, refcheck=False)
self.componentPrecisions.resize(newClass+1, refcheck=False)
self.occupationCount[newClass] = 0 # will increment later
self.componentMeans[newClass] = newComponentMean
self.componentPrecisions[newClass] = newComponentPrecision
self.krep += 1
# Put this observation in that class.
assert newClass not in self.unusedIndices
self.latentClass[i] = newClass
self.occupationCount[newClass] += 1
if self.occupationCount[prevClass] == 0:
self.componentMeans[prevClass] = self.componentPrecisions[prevClass] = 0
self.unusedIndices.add(prevClass)
self.krep -= 1
def sampleComponentParameters(self):
for j, occCount in enumerate(self.occupationCount):
if not occCount: continue
dataInComponent = self.data[self.latentClass==j]
assert len(dataInComponent) == occCount
componentPrecision = self.componentPrecisions[j]
precision = occCount*componentPrecision + self.r
self.componentMeans[j] = np.random.normal(
(dataInComponent.sum()*componentPrecision+self.lmb*self.r)/precision,
1./np.sqrt(precision))
self.componentPrecisions[j] = np.random.gamma(
(self.beta+occCount)/2.,
2./(self.w*self.beta + ((dataInComponent-self.componentMeans[j])**2).sum()))
def sampleHyperparameters(self):
# lambda
lmbMean = (self.dataMean/self.dataVar + self.r*self.componentMeans.sum())/(1./self.dataVar + self.krep*self.r)
lmbVar = 1./(1./self.dataVar + self.krep*self.r)
self.lmb = np.random.normal(lmbMean, np.sqrt(lmbVar))
# r
rShape = (self.krep + self.kappa)/2.
rScale = (self.kappa*self.dataVar+np.sum((self.componentMeans[self.occupationCount != 0]-self.lmb)**2))
self.r = np.random.gamma(rShape, rScale)
# w
wShape = (self.krep*self.beta+1)/2.
wScale = 2./(self.dataVar+self.beta*self.componentPrecisions.sum()) #unoccupied precisions are 0.
self.w = np.random.gamma(wShape, wScale)
# beta
self.beta = gibbsStep_beta(self.beta, self.componentPrecisions, self.w, self.krep)
# alpha
self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)
def gewekeStep(self):
# Fantasize data from the current model state
self.data = np.zeros(self.N)
for i in xrange(self.N):
component = self.latentClass[i]
self.data[i] = np.random.normal(self.componentMeans[component], 1./np.sqrt(self.componentPrecisions[component]))
def gibbsStep_alpha(alpha, k, n):
def logPAlpha(alpha):
if 0 < alpha < 100*n:
return (k-1.5)*np.log(alpha) - 1./(2*alpha) + gammaln(alpha) - gammaln(n+alpha)
return -np.inf
return sliceSample(logPAlpha, alpha, w=1)
def gibbsStep_beta(beta, componentPrecisions, w, krep):
precisions = componentPrecisions[componentPrecisions.nonzero()]
def logPBeta(beta):
if 0 < beta:
b2 = beta/2.
wp = w*precisions
return -krep*gammaln(b2) - 1./(2.*beta) + (krep*beta-3.)/2.*np.log(b2) + (b2*np.log(wp)-b2*wp).sum()
return -np.inf
return sliceSample(logPBeta, beta, w=100)
def crpDraw(alpha, N):
"""Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
occ = []
for i in xrange(N):
u = np.random.rand() * (alpha+i)
if u < alpha:
# new table
occ.append(1)
else:
# u ~ U(alpha, alpha+i)
u -= alpha
assert u<i
assert sum(occ) == i
for table, n in enumerate(occ):
if u < n:
occ[table] += 1
break
u -= n
return occ
def validateAlphaPosterior(nCustomers, nIter):
alphaSamples = np.zeros(nIter)
alpha = 1.0
for i in xrange(nIter):
d = crpDraw(alpha, nCustomers)
alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
alphaSamples[i] = alpha
return alphaSamples
def gewekeComponentParameters(N):
model = DPGMM(np.random.normal(0, 1, 500))
model.latentClass[:250] = 1
model.occupationCount = np.array([250, 250]).astype(np.uint32)
model.lmb = 0.
model.r = 1.
model.componentMeans.resize(2)
model.componentPrecisions.resize(2)
model.componentMeans.fill(model.lmb)
model.componentPrecisions.fill(model.r)
meanSamples = np.zeros(N*2)
precisionSamples = np.zeros(N*2)
for i in xrange(N):
model.gewekeStep()
model.sampleComponentParameters()
meanSamples[2*i:2*i+2] = model.componentMeans
precisionSamples[2*i:2*i+2] = model.componentPrecisions
return model, meanSamples, precisionSamples
def gewekeTest(N=1000):
model = DPGMM(np.random.normal(0, 1, 1))
alphaSamples = np.zeros(N)
for i in xrange(N):
print i
model.step()
model.gewekeStep()
#model._checkBookkeeping()
alphaSamples[i] = model.alpha
return alphaSamples
def testSingleComponent():
data = np.random.normal(0, 1, 500)
model = DPGMM(data)
model._checkBookkeeping()
model.step()
model._checkBookkeeping()
return model
def testTwoComponents():
data = np.r_[np.random.normal(0, 1, 250),
np.random.normal(10, 2, 250)]
model = DPGMM(data)
model._checkBookkeeping()
model.step()
model._checkBookkeeping()
print "Occ:", model.occupationCount
print "Means:", model.componentMeans
print "Precisions:", model.componentPrecisions
return model
def wishartDraw(df, V):
# stupid way
#X = np.random.multivariate_normal(np.zeros(len(V)), V, n)
#return np.dot(X.T, X)
# Bartlett decomposition
n = len(V)
L = cholesky(V) # lower-triangular factor
A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
X = np.dot(L, A)
return np.dot(X, X.T)
# Dirichlet process Gaussian mixture model.
# This one uses Normal-Wishart priors, then everything comes out better.
import numpy as np
from scipy.special import gammaln
import scipy.linalg as LA
from sliceSample import sliceSample
def covForKnownMean(x, mean):
q = x - mean
return q.T.dot(q)
def multinomialDraw(dist):
"""Returns a single draw from the given multinomial distribution."""
return np.random.multinomial(1, dist).argmax()
def mvpdf(obs, mean, invCov, det):
# Forget the (2pi)^(d/2) factor.
q = obs-mean
return det * np.exp(-q.T.dot(invCov).dot(q)/2.)
def wishartDraw(df, V):
# Bartlett decomposition
n = len(V)
L = LA.cholesky(V) # lower-triangular factor
A = np.diag(np.sqrt([np.random.chisquare(df-i) for i in xrange(n)]))
A[np.tril_indices_from(A, -1)] = np.random.standard_normal(n*(n-1)/2)
X = np.dot(L, A)
return np.dot(X, X.T)
def normalWishartDraw(mu, kappa, df, cov):
D = len(mu)
precision = wishartDraw(df, LA.inv(cov)) # DUDE FIXME.
mean = LA.solve_triangular(LA.cholesky(kappa * precision), np.random.standard_normal(D),
lower=True, overwrite_b=True) + mu
return mean, precision
def normalWishartPosteriorDraw(mu0, kappa, df, cov, n, xbar, sampleCovariance):
kappa_n = kappa + n
mu_n = (kappa*mu0 + n*xbar) / kappa_n
Tn = cov + sampleCovariance + kappa*n / kappa_n *covForKnownMean(mu0, xbar)
df_n = df + n
return normalWishartDraw(mu_n, kappa_n, df_n, Tn)
class DPGMM(object):
def __init__(self, data, mu0, kappa, df, cov):
self.data = np.array(data)
self.N, self.D = self.data.shape
self.mu0 = mu0
self.kappa = kappa
self.df = df
self.cov = cov
self.cov_chol = LA.cholesky(self.cov) # lower-triangular factor
# Draw initial hyperparameter values
self.alpha = 1./np.random.gamma(.5, 2)
self.latentClass = np.zeros(self.N, dtype=np.uint16) # indicators c_i
self.occupationCount = np.zeros(0, dtype=np.uint32) # n_j
self.krep = 0 # number of components actually represented
self.unusedIndices = set() # class indices that got removed.
self.componentMeans = np.zeros((0, self.D)) # mu_j
self.componentInvCovs = np.zeros((0, self.D, self.D)) # s_j
self.precisionDets = np.zeros(0)
self.iter = 0
def _checkBookkeeping(self):
krep = 0
for j, occCount in enumerate(self.occupationCount):
assert occCount == (self.latentClass == j).sum()
if occCount > 0: krep += 1
assert krep == self.krep
if self.iter:
assert self.occupationCount.sum() == self.N
for j in self.unusedIndices:
assert self.occupationCount[j] == 0
assert self.componentMeans[j] == 0
def step(self):
self.sampleClassAssignments()
self.sampleComponentParameters()
self.sampleHyperparameters()
self.iter += 1
def sampleClassAssignments(self):
for i in xrange(self.N):
obs = self.data[i]
prevClass = self.latentClass[i]
if self.iter:
self.occupationCount[prevClass] -= 1
assert self.occupationCount[prevClass] >= 0
classes = []
weights = []
def addClass(cls, occCount, obs, componentMean, componentInvCov, det):
classes.append(cls)
weights.append(
occCount/(self.N-1+self.alpha)* # prior
mvpdf(obs, componentMean, componentInvCov, det))
# Components with other observations associated
for j, occCount in enumerate(self.occupationCount):
if occCount == 0: continue
addClass(j, occCount, obs, self.componentMeans[j], self.componentInvCovs[j],
self.precisionDets[j])
# probability of generating a new class
if self.iter and self.occupationCount[prevClass] == 0:
# "Peculiar situation": use the class parameters
addClass(prevClass, self.alpha, obs, self.componentMeans[prevClass], self.componentInvCovs[prevClass], self.precisionDets[j])
else:
# unrepresented class
# Sample a component inv-covariance and mean from the normal-Wishart
newComponentMean, newComponentInvCov = normalWishartDraw(self.mu0, self.kappa, self.df, self.cov)
addClass(None, self.alpha, obs, newComponentMean, newComponentInvCov, LA.det(newComponentInvCov))
# Sample from the categorical distribution
weights = np.array(weights)
weights /= weights.sum()
newClass = classes[multinomialDraw(weights)]
if newClass is None:
# Allocate a new class!
if self.unusedIndices:
newClass = self.unusedIndices.pop()
else:
newClass = len(self.occupationCount)
self.occupationCount.resize(newClass+1, refcheck=False)
self.componentMeans.resize(newClass+1, self.D, refcheck=False)
self.componentInvCovs.resize(newClass+1, self.D, self.D, refcheck=False)
self.precisionDets.resize(newClass+1, refcheck=False)
self.occupationCount[newClass] = 0 # will increment later
self.componentMeans[newClass] = newComponentMean
self.componentInvCovs[newClass] = newComponentInvCov
self.precisionDets[newClass] = LA.det(newComponentInvCov)
self.krep += 1
# Put this observation in that class.
assert newClass not in self.unusedIndices
self.latentClass[i] = newClass
self.occupationCount[newClass] += 1
if self.occupationCount[prevClass] == 0:
self.componentMeans[prevClass] = self.componentInvCovs[prevClass] = self.precisionDets[prevClass] = 0
self.unusedIndices.add(prevClass)
self.krep -= 1
def sampleComponentParameters(self):
for j, occCount in enumerate(self.occupationCount):
if not occCount: continue
dataInComponent = self.data[self.latentClass==j]
assert len(dataInComponent) == occCount
mean = dataInComponent.mean(axis=0)
S = covForKnownMean(dataInComponent, mean) # sample covariance of data in component
self.componentMeans[j], self.componentInvCovs[j] = normalWishartPosteriorDraw(
self.mu0, self.kappa, self.df, self.cov, occCount, mean, S)
self.precisionDets[j] = LA.det(self.componentInvCovs[j])
def sampleHyperparameters(self):
# alpha
self.alpha = gibbsStep_alpha(self.alpha, self.krep, self.N)
def showState(self):
from matplotlib import pyplot as plt
plt.plot(self.data[:,0], self.data[:,1], '+', color='black')
colors = plt.cm.rainbow(np.linspace(0,1,10))
for i, j in enumerate(np.argsort(self.occupationCount)[-10:]):
if self.occupationCount[j] == 0: continue
plotEllipse(self.componentMeans[j], self.componentInvCovs[j], color=colors[i])
data = self.data[self.latentClass == j]
plt.plot(data[:,0], data[:,1], color=colors[i], linestyle='None', marker='+')
def gewekeStep(self):
# Fantasize data from the current model state
self.data = np.zeros(self.N)
chols = {}
for j, occCount in enumerate(self.occupationCount):
if not occCount: continue
chols[j] = LA.cholesky(self.componentInvCovs[j])
for i in xrange(self.N):
component = self.latentClass[i]
self.data[i] = chols[component].dot(np.random.standard_normal(self.D)) + self.componentMeans[component]
def gibbsStep_alpha(alpha, k, n):
def logPAlpha(alpha):
if 0 < alpha < 100*n:
return (k-1.5)*np.log(alpha) - 1./(2*alpha) + gammaln(alpha) - gammaln(n+alpha)
return -np.inf
return sliceSample(logPAlpha, alpha, w=1)
def crpDraw(alpha, N):
"""Draw table occupations from a Chinese Restaurant Process with the given parameter alpha."""
occ = []
for i in xrange(N):
u = np.random.rand() * (alpha+i)
if u < alpha:
# new table
occ.append(1)
else:
# u ~ U(alpha, alpha+i)
u -= alpha
assert u<i
assert sum(occ) == i
for table, n in enumerate(occ):
if u < n:
occ[table] += 1
break
u -= n
return occ
def validateAlphaPosterior(nCustomers, nIter):
alphaSamples = np.zeros(nIter)
alpha = 1.0
for i in xrange(nIter):
d = crpDraw(alpha, nCustomers)
alpha = gibbsStep_alpha(alpha, len(d), nCustomers)
alphaSamples[i] = alpha
return alphaSamples
def gewekeComponentParameters(N):
model = DPGMM(np.random.normal(0, 1, 500))
model.latentClass[:250] = 1
model.occupationCount = np.array([250, 250]).astype(np.uint32)
model.lmb = 0.
model.r = 1.
model.componentMeans.resize(2)
model.componentInvCovs.resize(2)
model.componentMeans.fill(model.lmb)
model.componentInvCovs.fill(model.r)
meanSamples = np.zeros(N*2)
precisionSamples = np.zeros(N*2)
for i in xrange(N):
model.gewekeStep()
model.sampleComponentParameters()
meanSamples[2*i:2*i+2] = model.componentMeans
precisionSamples[2*i:2*i+2] = model.componentInvCovs
return model, meanSamples, precisionSamples
def gewekeTest(N=1000):
model = DPGMM(np.random.normal(0, 1, 1))
alphaSamples = np.zeros(N)
for i in xrange(N):
print i
model.step()
model.gewekeStep()
#model._checkBookkeeping()
alphaSamples[i] = model.alpha
return alphaSamples
def testSingleComponent():
data = np.random.normal(0, 1, 500)
model = DPGMM(data)
model._checkBookkeeping()
model.step()
model._checkBookkeeping()
return model
def testTwoComponents():
data = np.r_[np.random.normal(0, 1, 250),
np.random.normal(10, 2, 250)]
model = DPGMM(data)
model._checkBookkeeping()
model.step()
model._checkBookkeeping()
print "Occ:", model.occupationCount
print "Means:", model.componentMeans
print "Precisions:", model.componentInvCovs
return model
def plotEllipse(mean, prec, **kwargs):
from matplotlib import pyplot as plt
vals, vecs = LA.eig(prec, right=True)
vals = np.sqrt(1./np.real(vals))
theta = np.linspace(0, 2*np.pi, 100)
points = vals[0]*vecs[0]*np.sin(theta)[:,np.newaxis] + vals[1]*vecs[1]*np.cos(theta)[:,np.newaxis]
points += mean
return plt.plot(points[:,0], points[:,1], **kwargs)
import numpy as np
eps = 1e-10
def sliceSample(logpdf, x0, w=1.0, positive=False):
pp = logpdf(x0)
z = pp - np.random.exponential(1.0)
# Sample from the slice -- logpdf(x)<z
# - pick an initial interval of width w, containing x0
L = x0 - np.random.rand()*w
if positive and L<0: L=eps
R = L+w
# - step out (no size limit)
while True:
if z < logpdf(L):
L -= w
if positive and L<0:
L=eps
break
else:
break
while True:
if z < logpdf(R):
R += w
else:
break
#print "Stepped out to L={}, R={}, pdf={}:{}".format(L, R, pdf(L), pdf(R))
# - sample from the part of the slice within (L, R)
while True:
x1 = L + np.random.rand()*(R-L)
pp = logpdf(x1)
#print '[{},{}] Sampled x1={}, pdf(state)={}'.format(L, R, x1, pp)
if z < pp:
break
else:
if x1 < x0:
L = x1
else:
R = x1;
return x1
def testSliceSample():
N = 10000
samples = np.zeros(N)
x = 0
mu = 0.; var = 1.
def logpdf(x):
return -.5*np.log(2*np.pi*var) - (x-mu)**2/(2*var)
for i in xrange(N):
x = sliceSample(logpdf, x, positive=True)
samples[i] = x
return samples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment