Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Mixture of Gaussians
#### Author: Wilson Mongwe
#### Date: 21/06/2015
#### Website: www.wilsonmongwe.co.za
#### Title: This code is for implementing the EM algorithm to estimate paramters from a mixture
#### of three normal distributions.
########################### Simulating the population data #############################
N= 10000 ### Number of draws from the mixture
normals= 3 ### number of normals in the mixture. We have chosen to use 3 in this example.
components = sample(1:normals,prob=c(0.2,0.6,0.2),size=N,replace=TRUE) ### Decideing which normal to draw from
mus= c(-3,9.7,4) ### The means of the normals
sds =c(1.5,1.2,1.1) #### standard deviations of each of the normals
x = rnorm(n=N,mean=mus[components],sd=sds[components]) ### creating the mixture
plot(density(x),xlim=c(-15,28),ylim=c(0,0.3),
xlab="Values",ylab="Density",main="Density of mixture of Normals",lwd=4) ### plotting the density of the mixture
par(new=TRUE)
########################### Setting up the EM algorithm #############################
#### Distribution of unboserved data given obseerved data #####
q_one=function(w)
{
lam_one = (w[1])
lam_two = (w[2])
mu_one=w[3]
sig_one=(w[4])
mu_two=w[5]
sig_two=(w[6])
mu_three=w[7]
sig_three=(w[8])
(dnorm(x,mu_one,sig_one,log=FALSE)*lam_one)/
(dnorm(x,mu_one,sig_one,log=FALSE)*lam_one+dnorm(x,mu_two,sig_two,log=FALSE)*lam_two+
dnorm(x,mu_three,sig_three,log=FALSE)*(1-lam_one-lam_two))
}
q_two=function(w)
{
lam_one = (w[1])
lam_two = (w[2])
mu_one=w[3]
sig_one=(w[4])
mu_two=w[5]
sig_two=(w[6])
mu_three=w[7]
sig_three=(w[8])
(dnorm(x,mu_two,sig_two,log=FALSE)*lam_two)/
(dnorm(x,mu_one,sig_one,log=FALSE)*lam_one+dnorm(x,mu_two,sig_two,log=FALSE)*lam_two+
dnorm(x,mu_three,sig_three,log=FALSE)*(1-lam_one-lam_two))
}
q_three=function(w)
{
lam_one = (w[1])
lam_two = (w[2])
mu_one=w[3]
sig_one=(w[4])
mu_two=w[5]
sig_two=(w[6])
mu_three=w[7]
sig_three=(w[8])
dnorm(x,mu_three,sig_three,log=FALSE)*(1-lam_one-lam_two)/
(dnorm(x,mu_one,sig_one,log=FALSE)*lam_one+dnorm(x,mu_two,sig_two,log=FALSE)*lam_two+
dnorm(x,mu_three,sig_three,log=FALSE)*(1-lam_one-lam_two))
}
#### Likelihood function #####
likelihood=function(w)
{
lam_one = (w[1])
lam_two = (w[2])
mu_one=w[3]
sig_one=(w[4])
mu_two=w[5]
sig_two=(w[6])
mu_three=w[7]
sig_three=(w[8])
result = log(dnorm(x,mu_one,sig_one,log=FALSE)*lam_one/q_one(w))*q_one(w)+
log(dnorm(x,mu_two,sig_two,log=FALSE)*lam_two/q_two(w))*q_two(w)+
log(dnorm(x,mu_three,sig_three,log=FALSE)*(1-lam_one-lam_two)/q_three(w))*(q_three(w))
-sum(result)
}
##### Implementing the EM algorithm #####
plot(density(x),xlim=c(-15,28),ylim=c(0,0.3),
xlab="Values",ylab="Density",main="Density of mixture of Normals",lwd=4) ### plotting the density of the mixture
par(new=TRUE)
start=c(runif(1,0.1,0.4),runif(1,0.1,0.4),runif(1,-1,1),
runif(1,0.2,1.4),runif(1,8,11),runif(1,0.2,1.4),
runif(1,2,7),runif(1,0,1.4)) #### Initial value of the parameters. EM algorithm is very sensitive to this
components = sample(1:normals,prob=c((start[1]),(start[2]),1-(start[1])-(start[2])),size=N,replace=TRUE)
mus= c((start[3]),(start[5]),(start[7]))
sds =c((start[4]),(start[6]),(start[8]))
samples = rnorm(n=N,mean=mus[components],sd=sds[components])
plot(density(samples),xlim=c(-15,28),ylim=c(0,0.3),xlab="",ylab="",main="",col="red",lwd=2)
par(new=TRUE)
tolerance = 20
i=1
functionValueCurrent=0
functionValueNew=100000
while(tolerance>10^-10)
{
if(i==1)
{
cat("Iteration number:\t", i,"Estimates:\t", c((start[1]),(start[2]),(1-(start[1])-(start[2])),
(start[3]),(start[4]),
(start[5])),(start[6]),
(start[7]),(start[8]), "\n")
}
functionValueCurrent=functionValueNew
p=optim(start,likelihood) ### Note that parameters have not been constrained. They should be constrained!
functionValueNew= p$value
tolerance= abs(functionValueNew-functionValueCurrent)
i=i+1
start=p$par
#cat("Current:\t", functionValueCurrent,"\n")
#cat("New:\t", functionValueNew,"\n")
#cat("Tolerance:\t", tolerance,"\n")
cat("Iteration number:\t", i,"Estimates:\t", c((start[1]),(start[2]),(1-(start[1])-(start[2])),
(start[3]),(start[4]),
(start[5])),(start[6]),
(start[7]),(start[8]), "\n")
components = sample(1:normals,prob=c((start[1]),(start[2]),1-(start[1])-(start[2])),size=N,replace=TRUE)
mus= c((start[3]),(start[5]),(start[7]))
sds =c((start[4]),(start[6]),(start[8]))
samples = rnorm(n=N,mean=mus[components],sd=sds[components])
plot(density(samples),xlim=c(-15,28),ylim=c(0,0.3),xlab="",ylab="",main="",col="red",lwd=2)
par(new=TRUE)
}
plot(density(x),xlim=c(-15,28),ylim=c(0,0.3),
xlab="Values",ylab="Density",main="Density of mixture of Normals",lwd=1) ### plotting the density of the mixture
################# GIF generation for website ###########################
ani.options(convert = 'C:\\Program Files\\ImageMagick-6.9.1-Q16\\convert.exe')
library(animation)
ani.options(interval=0.5)
start=c(runif(1,0.1,0.4),runif(1,0.1,0.4),runif(1,-1,1),
runif(1,0.2,1.4),runif(1,8,11),runif(1,0.2,1.4),
runif(1,2,7),runif(1,0,1.4))
saveGIF({
components = sample(1:normals,prob=c((start[1]),(start[2]),1-(start[1])-(start[2])),size=N,replace=TRUE)
mus= c((start[3]),(start[5]),(start[7]))
sds =c((start[4]),(start[6]),(start[8]))
plot(density(x),xlim=c(-15,28),ylim=c(0,0.3),
xlab="0",ylab="",main="Density of mixture of Normals",lwd=3) ### plotting the density of the mixture
par(new=TRUE)
samples = rnorm(n=N,mean=mus[components],sd=sds[components])
plot(density(samples),xlim=c(-15,28),ylim=c(0,0.3),xlab="0",ylab="",main="",col="red",lwd=2)
legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)
for( k in 1:10)
{
plot(density(x),xlim=c(-15,28),ylim=c(0,0.3),
xlab=k,ylab="",main="Density of mixture of Normals",lwd=4) ### plotting the density of the mixture
par(new=TRUE)
p=optim(start,likelihood) ### Note that parameters have not been constrained. They should be constrained!
start=p$par
#cat("Current:\t", functionValueCurrent,"\n")
#cat("New:\t", functionValueNew,"\n")
#cat("Tolerance:\t", tolerance,"\n")
cat("Iteration number:\t", k,"Estimates:\t", c((start[1]),(start[2]),(1-(start[1])-(start[2])),
(start[3]),(start[4]),
(start[5])),(start[6]),
(start[7]),(start[8]), "\n")
components = sample(1:normals,prob=c((start[1]),(start[2]),1-(start[1])-(start[2])),size=N,replace=TRUE)
mus= c((start[3]),(start[5]),(start[7]))
sds =c((start[4]),(start[6]),(start[8]))
samples = rnorm(n=N,mean=mus[components],sd=sds[components])
plot(density(samples),xlim=c(-15,28),ylim=c(0,0.3),xlab=k,ylab="",main="",col="red",lwd=2)
legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.