Skip to content

Instantly share code, notes, and snippets.

@alextp
Created March 26, 2011 00:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alextp/887906 to your computer and use it in GitHub Desktop.
Save alextp/887906 to your computer and use it in GitHub Desktop.
open MathNet.Numerics;
open System
let a = Fn.Factorial 10
let elementwiseProduct (a: float array) (b: float array) = [| for i in 0..a.Length-1 -> a.[i]*b.[i] |]
let arraySum (a: float array) (b: float array) = [|for i in 0..a.Length-1 -> a.[i]+b.[i] |]
let scalarProduct (a: float) (b: float array) = [| for i in 0..b.Length-1 -> a*b.[i] |]
let expectation (z: float array) (v: float array array) =
let prod = [| for i in 0..v.Length-1 -> scalarProduct z.[i] v.[i] |]
let mutable l = prod.[0]
for i in 1..prod.Length-1 do
l <- arraySum l prod.[i]
l
let dot (a: float array) (b: float array) = Array.sum (elementwiseProduct a b)
let sqnorm a = dot a a
let arraySub a b = arraySum a (scalarProduct -1.0 b)
let ones k = [| for i in 0..k-1 -> 1.0 |]
let sumall (v: float array array) =
let mutable s = 0.0
for i in 0..v.Length-1 do
s <- s + Array.sum v.[i]
s
let logiBeta (alpha: float) (k: int) =
let K = float k
Fn.GammaLn(alpha*K) - K*Fn.GammaLn(alpha)
let logiBetav (alpha: float array) = Fn.GammaLn(Array.sum(alpha)) - Array.sum(Array.map Fn.GammaLn alpha)
// This is the usual mean-field lower-bound for a dirichlet random variable. You get it by computing
// E_q[\log P(\phi)] - E_q[\log Q(\phi)]. The second term is a standard entropy of a dirichlet
// distribution, and the first term can be computed rather trivially using the known fact that
// the expectation of the log of a dirichlet-distributed probability is
// (digamma(alpha_i) - digamma(sum_alpha))
let clusBoundPhi (alpha: float) (phi: float array) =
let k = phi.Length
let K = float k
let alpha0 = K*alpha
let phi0 = Array.sum(phi)
let vd a = (Fn.Digamma a) - (Fn.Digamma phi0)
((logiBeta alpha k) + (Array.sum(elementwiseProduct (scalarProduct (alpha-1.0) (ones k))
(Array.map vd phi)))
- (logiBetav phi) + (phi0-K)*(Fn.Digamma phi0)
- (Array.sum (elementwiseProduct (arraySub phi (ones k)) (Array.map Fn.Digamma phi))))
// This is the mean-field lower bound for a standard gaussian variable. It is - 0.5 || mu ||^2 + C
let clusBoundMu (mu: float array) = -0.5*(sqnorm mu)
// The bound for z is by far the simplest. It's the sum of the expected logarithms of the
// probabilities sampled from phi minus the sum of the logarithms of the z variables.
let clusBoundZ (z: float array array) (phi: float array) =
let phi0 = Array.sum(phi)
let vd a = (Fn.Digamma a) - (Fn.Digamma phi0)
(Array.sum (Array.map vd phi)) - (sumall (Array.map (fun i -> Array.map Math.Log i) z))
// The lower bound for theta in the cluster model.
// You can get it by computing E_q [\log P(theta) ] - E_q[\log Q(\theta)]
// note that the second term above is a constant (as we're not modeling the variance
// of the variational approximation). The first term is constants plus
// - 0.5 D \log sigma - 0.5 || theta - E_z[mu] ||^2/sigma (you can get this from
// the expression for the multivariate gaussian distribution).
let clusBoundTheta (theta: float array) (mu: float array array) (z: float array) (sigma: float) =
let mmu = expectation z mu
-0.5*(float theta.Length)*(System.Math.Log sigma) - 0.5*(sqnorm (arraySub theta mmu))/sigma
let clusBoundThetas (theta: float array array) mu (z: float array array) sigma =
let mutable bound = 0.0
for i in 0..theta.Length-1 do
bound <- bound + (clusBoundTheta theta.[i] mu z.[i] sigma)
bound
// Finally, the lower bound for Y. As we're not approximating the distribution for Y there is no
// need for an entropy term. This is essentially the log-likelihood of Y|X according to a specified
// value of theta. After ignoring constants, it's the sum of -0.5 (Y- theta^T X)^2
let clusBoundY (theta: float array) (Y: float array) (X: float array array) =
let mutable ps = 0.
for i in 0..Y.Length-1 do
let pred = dot theta X.[i]
ps <- ps - 0.5*(Y.[i]-pred)*(Y.[i]-pred)
ps
let clusBoundYs (theta: float array array) (X: float array array array) (Y: float array array) =
let mutable bound = 0.0
for i in 0..Y.Length-1 do
bound <- bound + (clusBoundY theta.[i] Y.[i] X.[i])
bound
let clusLowerBound (T: int) (K: int) (D: int) theta mu sigma alpha phi (z: float array array) X Y =
((clusBoundPhi alpha phi) + (Array.sum (Array.map clusBoundMu mu)) + (clusBoundZ z phi) + (clusBoundThetas theta mu z sigma) + (clusBoundYs theta X Y))
// This is the standard mean-field update for a dirichlet-multinomial. See Blei, Ng, and Jordan's LDA
// journal paper for a derivation
let clusUpdatePhi (alpha: float) K (z: float array array) =
let mutable phi = Array.init z.[0].Length (fun i -> alpha)
for i in 0..z.Length-1 do
let zi = z.[i]
for j in 0..zi.Length-1 do
phi.[j] <- phi.[j] + zi.[j]
phi
// This is the standard mean-field update for the mean of a mixture of gaussians when the variational
// distribution doesn't model the variances. Essentially it's a weighted expectation over the thetas
// weighted by the corresponding z variables, with a small regularization to pull things toward the
// prior.
let clusUpdateMu (theta: float array array) (z: float array array) =
let T = theta.Length
let D = theta.[0].Length
let K = z.[0].Length
let mutable mu = Array.init K (fun i -> Array.init D (fun j -> 0.))
for i in 0..K-1 do
let num = mu.[i]
let mutable den = 1.0
for j in 0..T-1 do
for d in 0..D-1 do
num.[d] <- num.[d] + z.[j].[i]*theta.[j].[d]
den <- den + z.[j].[i]
mu.[i] <- scalarProduct (11.0/den) num
mu
// This is the standard exp-digamma update for z
let clusUpdateZ (theta: float array array) (mu: float array array) (phi: float array) sigma =
let T = theta.Length
let K = mu.Length
let mutable z = Array.init T (fun i -> Array.init K (fun j -> 0.))
for i in 0..T-1 do
let num = z.[i]
let mutable sum = 0.0
for j in 0..K-1 do
let term = Math.Exp ((Fn.Digamma phi.[j]) + 0.5*(sqnorm (arraySub theta.[i] mu.[j]))/sigma)
num.[j] <- term
sum <- sum + term
for j in 0..K-1 do
num.[j] <- num.[j] / sum
z.[i] <- num
z
// This might not look trivial, but it's just computing the ridge regression of theta with a
// prior equal to the expected mu according to z
let clusUpdateTheta (mu: float array array) (z: float array array) (x: float array array array) (y: float array array) (sigma: float) =
let T = z.Length
let D = mu.[0].Length
let K = z.[0].Length
let mutable theta = Array.init T (fun i -> Array.init D (fun j -> 0.))
for i in 0..T-1 do
let mmu = expectation z.[i] mu
let mutable num = scalarProduct (1.0/sigma) mmu
let den = Array2D.init D D (fun i j -> if i = j then sigma else 0.0)
let X = x.[i]
let Y = y.[i]
for j in 0..X.Length-1 do
num <- arraySum num (scalarProduct Y.[j] X.[j])
for k in 0..D-1 do
for l in 0..D-1 do
den.[k,l] <- den.[k,l] * X.[j].[k] * X.[j].[l]
let a = LinearAlgebra.Matrix.Create(den)
let b = LinearAlgebra.Matrix.Create([|num|])
let c = a.SolveTranspose b
theta.[i] <- c.GetArray().[0]
theta
// MathNet.Numerics.
let r = RandomSources.MersenneTwisterRandomSource()
let randomVector D = [| for i in 0..D -> (float (r.NextDouble()))|]
let initialize (x: float array array array) (y: float array array) (alpha: float) (sigma: float) (K: int) =
let T = x.Length
let D = x.[0].Length
let theta = Array.init T (fun i -> randomVector D)
let mu = Array.init K (fun i -> randomVector D)
let z = Array.init T (fun i -> ones K)
let phi = Array.init K (fun i -> alpha)
(phi, mu, z, theta)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment