Created
March 26, 2011 00:53
-
-
Save alextp/887906 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open MathNet.Numerics; | |
open System | |
let a = Fn.Factorial 10 | |
let elementwiseProduct (a: float array) (b: float array) = [| for i in 0..a.Length-1 -> a.[i]*b.[i] |] | |
let arraySum (a: float array) (b: float array) = [|for i in 0..a.Length-1 -> a.[i]+b.[i] |] | |
let scalarProduct (a: float) (b: float array) = [| for i in 0..b.Length-1 -> a*b.[i] |] | |
let expectation (z: float array) (v: float array array) = | |
let prod = [| for i in 0..v.Length-1 -> scalarProduct z.[i] v.[i] |] | |
let mutable l = prod.[0] | |
for i in 1..prod.Length-1 do | |
l <- arraySum l prod.[i] | |
l | |
let dot (a: float array) (b: float array) = Array.sum (elementwiseProduct a b) | |
let sqnorm a = dot a a | |
let arraySub a b = arraySum a (scalarProduct -1.0 b) | |
let ones k = [| for i in 0..k-1 -> 1.0 |] | |
let sumall (v: float array array) = | |
let mutable s = 0.0 | |
for i in 0..v.Length-1 do | |
s <- s + Array.sum v.[i] | |
s | |
let logiBeta (alpha: float) (k: int) = | |
let K = float k | |
Fn.GammaLn(alpha*K) - K*Fn.GammaLn(alpha) | |
let logiBetav (alpha: float array) = Fn.GammaLn(Array.sum(alpha)) - Array.sum(Array.map Fn.GammaLn alpha) | |
// This is the usual mean-field lower-bound for a dirichlet random variable. You get it by computing | |
// E_q[\log P(\phi)] - E_q[\log Q(\phi)]. The second term is a standard entropy of a dirichlet | |
// distribution, and the first term can be computed rather trivially using the known fact that | |
// the expectation of the log of a dirichlet-distributed probability is | |
// (digamma(alpha_i) - digamma(sum_alpha)) | |
let clusBoundPhi (alpha: float) (phi: float array) = | |
let k = phi.Length | |
let K = float k | |
let alpha0 = K*alpha | |
let phi0 = Array.sum(phi) | |
let vd a = (Fn.Digamma a) - (Fn.Digamma phi0) | |
((logiBeta alpha k) + (Array.sum(elementwiseProduct (scalarProduct (alpha-1.0) (ones k)) | |
(Array.map vd phi))) | |
- (logiBetav phi) + (phi0-K)*(Fn.Digamma phi0) | |
- (Array.sum (elementwiseProduct (arraySub phi (ones k)) (Array.map Fn.Digamma phi)))) | |
// This is the mean-field lower bound for a standard gaussian variable. It is - 0.5 || mu ||^2 + C | |
let clusBoundMu (mu: float array) = -0.5*(sqnorm mu) | |
// The bound for z is by far the simplest. It's the sum of the expected logarithms of the | |
// probabilities sampled from phi minus the sum of the logarithms of the z variables. | |
let clusBoundZ (z: float array array) (phi: float array) = | |
let phi0 = Array.sum(phi) | |
let vd a = (Fn.Digamma a) - (Fn.Digamma phi0) | |
(Array.sum (Array.map vd phi)) - (sumall (Array.map (fun i -> Array.map Math.Log i) z)) | |
// The lower bound for theta in the cluster model. | |
// You can get it by computing E_q [\log P(theta) ] - E_q[\log Q(\theta)] | |
// note that the second term above is a constant (as we're not modeling the variance | |
// of the variational approximation). The first term is constants plus | |
// - 0.5 D \log sigma - 0.5 || theta - E_z[mu] ||^2/sigma (you can get this from | |
// the expression for the multivariate gaussian distribution). | |
let clusBoundTheta (theta: float array) (mu: float array array) (z: float array) (sigma: float) = | |
let mmu = expectation z mu | |
-0.5*(float theta.Length)*(System.Math.Log sigma) - 0.5*(sqnorm (arraySub theta mmu))/sigma | |
let clusBoundThetas (theta: float array array) mu (z: float array array) sigma = | |
let mutable bound = 0.0 | |
for i in 0..theta.Length-1 do | |
bound <- bound + (clusBoundTheta theta.[i] mu z.[i] sigma) | |
bound | |
// Finally, the lower bound for Y. As we're not approximating the distribution for Y there is no | |
// need for an entropy term. This is essentially the log-likelihood of Y|X according to a specified | |
// value of theta. After ignoring constants, it's the sum of -0.5 (Y- theta^T X)^2 | |
let clusBoundY (theta: float array) (Y: float array) (X: float array array) = | |
let mutable ps = 0. | |
for i in 0..Y.Length-1 do | |
let pred = dot theta X.[i] | |
ps <- ps - 0.5*(Y.[i]-pred)*(Y.[i]-pred) | |
ps | |
let clusBoundYs (theta: float array array) (X: float array array array) (Y: float array array) = | |
let mutable bound = 0.0 | |
for i in 0..Y.Length-1 do | |
bound <- bound + (clusBoundY theta.[i] Y.[i] X.[i]) | |
bound | |
let clusLowerBound (T: int) (K: int) (D: int) theta mu sigma alpha phi (z: float array array) X Y = | |
((clusBoundPhi alpha phi) + (Array.sum (Array.map clusBoundMu mu)) + (clusBoundZ z phi) + (clusBoundThetas theta mu z sigma) + (clusBoundYs theta X Y)) | |
// This is the standard mean-field update for a dirichlet-multinomial. See Blei, Ng, and Jordan's LDA | |
// journal paper for a derivation | |
let clusUpdatePhi (alpha: float) K (z: float array array) = | |
let mutable phi = Array.init z.[0].Length (fun i -> alpha) | |
for i in 0..z.Length-1 do | |
let zi = z.[i] | |
for j in 0..zi.Length-1 do | |
phi.[j] <- phi.[j] + zi.[j] | |
phi | |
// This is the standard mean-field update for the mean of a mixture of gaussians when the variational | |
// distribution doesn't model the variances. Essentially it's a weighted expectation over the thetas | |
// weighted by the corresponding z variables, with a small regularization to pull things toward the | |
// prior. | |
let clusUpdateMu (theta: float array array) (z: float array array) = | |
let T = theta.Length | |
let D = theta.[0].Length | |
let K = z.[0].Length | |
let mutable mu = Array.init K (fun i -> Array.init D (fun j -> 0.)) | |
for i in 0..K-1 do | |
let num = mu.[i] | |
let mutable den = 1.0 | |
for j in 0..T-1 do | |
for d in 0..D-1 do | |
num.[d] <- num.[d] + z.[j].[i]*theta.[j].[d] | |
den <- den + z.[j].[i] | |
mu.[i] <- scalarProduct (11.0/den) num | |
mu | |
// This is the standard exp-digamma update for z | |
let clusUpdateZ (theta: float array array) (mu: float array array) (phi: float array) sigma = | |
let T = theta.Length | |
let K = mu.Length | |
let mutable z = Array.init T (fun i -> Array.init K (fun j -> 0.)) | |
for i in 0..T-1 do | |
let num = z.[i] | |
let mutable sum = 0.0 | |
for j in 0..K-1 do | |
let term = Math.Exp ((Fn.Digamma phi.[j]) + 0.5*(sqnorm (arraySub theta.[i] mu.[j]))/sigma) | |
num.[j] <- term | |
sum <- sum + term | |
for j in 0..K-1 do | |
num.[j] <- num.[j] / sum | |
z.[i] <- num | |
z | |
// This might not look trivial, but it's just computing the ridge regression of theta with a | |
// prior equal to the expected mu according to z | |
let clusUpdateTheta (mu: float array array) (z: float array array) (x: float array array array) (y: float array array) (sigma: float) = | |
let T = z.Length | |
let D = mu.[0].Length | |
let K = z.[0].Length | |
let mutable theta = Array.init T (fun i -> Array.init D (fun j -> 0.)) | |
for i in 0..T-1 do | |
let mmu = expectation z.[i] mu | |
let mutable num = scalarProduct (1.0/sigma) mmu | |
let den = Array2D.init D D (fun i j -> if i = j then sigma else 0.0) | |
let X = x.[i] | |
let Y = y.[i] | |
for j in 0..X.Length-1 do | |
num <- arraySum num (scalarProduct Y.[j] X.[j]) | |
for k in 0..D-1 do | |
for l in 0..D-1 do | |
den.[k,l] <- den.[k,l] * X.[j].[k] * X.[j].[l] | |
let a = LinearAlgebra.Matrix.Create(den) | |
let b = LinearAlgebra.Matrix.Create([|num|]) | |
let c = a.SolveTranspose b | |
theta.[i] <- c.GetArray().[0] | |
theta | |
// MathNet.Numerics. | |
let r = RandomSources.MersenneTwisterRandomSource() | |
let randomVector D = [| for i in 0..D -> (float (r.NextDouble()))|] | |
let initialize (x: float array array array) (y: float array array) (alpha: float) (sigma: float) (K: int) = | |
let T = x.Length | |
let D = x.[0].Length | |
let theta = Array.init T (fun i -> randomVector D) | |
let mu = Array.init K (fun i -> randomVector D) | |
let z = Array.init T (fun i -> ones K) | |
let phi = Array.init K (fun i -> alpha) | |
(phi, mu, z, theta) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment