WilsonMongwe/CoinTossing.R

## CoinTossing.R
#### Author: Wilson Mongwe
#### Date: 21/06/2015
#### Website: www.wilsonmongwe.co.za
#### Title: Implementing the EM algorithm to the coin tossing example

####################### Simulating the population data #########################
N= 10000
numberOfCOins=2
components = sample(1:numberOfCOins,prob=c(0.45,0.55),size=N,replace=TRUE)
thetas= c(0.7,0.2)  ### probablity of obtaining  a head on Coin 2 and Coin 3 respectively
trials =10  ### number of times that each of Coin 2 and Coin 3 gets tossed.

x = rbinom(n=N,size=trials,prob=thetas[components]) ### sampling from a binomial distribution
plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
     ylab="Density",main="Approximate 'density' of binomial mixture",lwd=4)
par(new=TRUE)


########################### Setting up the EM algorithm #############################

#### Distribution of unboserved data given obseerved data #####

q_one=function(w)
  {
    lam = (w[1])
    p_one=(w[2])
    p_two=(w[3])

    (dbinom(x,trials,p_one,log=FALSE)*lam)/
      (dbinom(x,trials,p_one,log=FALSE)*lam+dbinom(x,trials,p_two,log=FALSE)*(1-lam))
  }

q_two=function(w)
  {
   lam = (w[1])
   p_one=(w[2])
   p_two=(w[3])

   (dbinom(x,trials,p_two,log=FALSE)*(1-lam))/
     (dbinom(x,trials,p_one,log=FALSE)*lam+dbinom(x,trials,p_two,log=FALSE)*(1-lam))
  }


#### Setting up the Likelihood ####

  likelihood=function(w)
  {
    lam = (w[1])
    p_one=(w[2])
    p_two=(w[3])

    result = log(dbinom(x,trials,p_one,log=FALSE)*lam/q_one(w))*q_one(w)+
      log(dbinom(x,trials,p_two,log=FALSE)*(1-lam)/q_two(w))*q_two(w)

    -sum(result)
  }


##### Implementing the EM algorithm #####

plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
     ylab="Density",main="Approximate 'density' of binomial mixture",lwd=4)
par(new=TRUE)

start = c(runif(1,0.4,0.9),runif(1,0.4,0.9),runif(1,0.4,0.9))
tolerance = 20
i=1
functionValueCurrent=0
functionValueNew=100000

while(tolerance>10^-10)
{

 if(i==1)
 {
   cat("Iteration number:\t", i,"Estimates:\t",c((start[1]),
                              (start[2]),
                              (start[3])), "\n")

 }

functionValueCurrent=functionValueNew
p=optim(start,likelihood,method = "L-BFGS-B", lower=c(0.01,0.01,0.01),upper=c(0.99,0.99,0.99))
functionValueNew= p$value
tolerance= abs(functionValueNew-functionValueCurrent)
i=i+1
start=p$par

#cat("Current:\t", functionValueCurrent,"\n")
#cat("New:\t", functionValueNew,"\n")
#cat("Tolerance:\t", tolerance,"\n")
cat("Iteration number:\t", i,"Estimates:\t", c((start[1]),
                                                 (start[2]),
                                                 (start[3])), "\n")

components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
thetas= c((start[2]),(start[3]))
samples = rbinom(n=N,size=trials,prob=thetas[components])

plot(density(samples),xlim=c(-2,12),ylim=c(0,0.26),xlab="",ylab="",main="",col="red",lwd=2)
par(new=TRUE)

}


plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
     ylab="Density",main="Approximate 'density' of binomial mixture",lwd=2)


################# GIF generation for website ###########################


ani.options(convert = 'C:\\Program Files\\ImageMagick-6.9.1-Q16\\convert.exe')
library(animation)
ani.options(interval=0.5)
start=c(runif(1,0.1,0.9),runif(1,0.1,0.9),runif(1,0.1,0.9))
saveGIF({

  components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
  thetas= c((start[2]),(start[3]))
  samples = rbinom(n=N,size=trials,prob=thetas[components])


  plot(density(x),xlim=c(-2,12),ylim=c(0,0.4),xlab="0",
       ylab="Density",main="Approximate 'density' of binomial mixture",col="black",lwd=4) ### plotting the density of the mixture
  par(new=TRUE)

  plot(density(samples),xlim=c(-2,12),ylim=c(0,0.4),xlab="",
       ylab="",main="Approximate 'density' of binomial mixture",col="red",lwd=2)
  legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)

  for( k in 1:10)
  {
    plot(density(x),xlim=c(-2,12),ylim=c(0,0.4),xlab=k,
         ylab="Density",main="Approximate 'density' of binomial mixture",col="black",lwd=4) ### plotting the density of the mixture
    par(new=TRUE)

    p=optim(start,likelihood,method = "L-BFGS-B", lower=c(0.01,0.01,0.01),upper=c(0.99,0.99,0.99))

    start=p$par

    cat("Iteration number:\t", k,"Estimates:\t", c((start[1]),
                                                   (start[2]),
                                                   (start[3])), "\n")

    components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
    thetas= c((start[2]),(start[3]))
    samples = rbinom(n=N,size=trials,prob=thetas[components])

    samples = rbinom(n=N,size=trials,prob=thetas[components])
    plot(density(samples),xlim=c(-2,12),ylim=c(0,0.4),xlab=k,
         ylab="",main="Approximate 'density' of binomial mixture",col="red",lwd=2)
    legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)


  }

})
	#### Author: Wilson Mongwe
	#### Date: 21/06/2015
	#### Website: www.wilsonmongwe.co.za
	#### Title: Implementing the EM algorithm to the coin tossing example

	####################### Simulating the population data #########################
	N= 10000
	numberOfCOins=2
	components = sample(1:numberOfCOins,prob=c(0.45,0.55),size=N,replace=TRUE)
	thetas= c(0.7,0.2) ### probablity of obtaining a head on Coin 2 and Coin 3 respectively
	trials =10 ### number of times that each of Coin 2 and Coin 3 gets tossed.

	x = rbinom(n=N,size=trials,prob=thetas[components]) ### sampling from a binomial distribution
	plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
	ylab="Density",main="Approximate 'density' of binomial mixture",lwd=4)
	par(new=TRUE)


	########################### Setting up the EM algorithm #############################

	#### Distribution of unboserved data given obseerved data #####

	q_one=function(w)
	{
	lam = (w[1])
	p_one=(w[2])
	p_two=(w[3])

	(dbinom(x,trials,p_one,log=FALSE)*lam)/
	(dbinom(x,trials,p_one,log=FALSE)lam+dbinom(x,trials,p_two,log=FALSE)(1-lam))
	}

	q_two=function(w)
	{
	lam = (w[1])
	p_one=(w[2])
	p_two=(w[3])

	(dbinom(x,trials,p_two,log=FALSE)*(1-lam))/
	(dbinom(x,trials,p_one,log=FALSE)lam+dbinom(x,trials,p_two,log=FALSE)(1-lam))
	}


	#### Setting up the Likelihood ####

	likelihood=function(w)
	{
	lam = (w[1])
	p_one=(w[2])
	p_two=(w[3])

	result = log(dbinom(x,trials,p_one,log=FALSE)lam/q_one(w))q_one(w)+
	log(dbinom(x,trials,p_two,log=FALSE)(1-lam)/q_two(w))q_two(w)

	-sum(result)
	}


	##### Implementing the EM algorithm #####

	plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
	ylab="Density",main="Approximate 'density' of binomial mixture",lwd=4)
	par(new=TRUE)

	start = c(runif(1,0.4,0.9),runif(1,0.4,0.9),runif(1,0.4,0.9))
	tolerance = 20
	i=1
	functionValueCurrent=0
	functionValueNew=100000

	while(tolerance>10^-10)
	{

	if(i==1)
	{
	cat("Iteration number:\t", i,"Estimates:\t",c((start[1]),
	(start[2]),
	(start[3])), "\n")

	}

	functionValueCurrent=functionValueNew
	p=optim(start,likelihood,method = "L-BFGS-B", lower=c(0.01,0.01,0.01),upper=c(0.99,0.99,0.99))
	functionValueNew= p$value
	tolerance= abs(functionValueNew-functionValueCurrent)
	i=i+1
	start=p$par

	#cat("Current:\t", functionValueCurrent,"\n")
	#cat("New:\t", functionValueNew,"\n")
	#cat("Tolerance:\t", tolerance,"\n")
	cat("Iteration number:\t", i,"Estimates:\t", c((start[1]),
	(start[2]),
	(start[3])), "\n")

	components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
	thetas= c((start[2]),(start[3]))
	samples = rbinom(n=N,size=trials,prob=thetas[components])

	plot(density(samples),xlim=c(-2,12),ylim=c(0,0.26),xlab="",ylab="",main="",col="red",lwd=2)
	par(new=TRUE)

	}


	plot(density(x),xlim=c(-2,12),ylim=c(0,0.26),xlab="Values",
	ylab="Density",main="Approximate 'density' of binomial mixture",lwd=2)



	################# GIF generation for website ###########################


	ani.options(convert = 'C:\\Program Files\\ImageMagick-6.9.1-Q16\\convert.exe')
	library(animation)
	ani.options(interval=0.5)
	start=c(runif(1,0.1,0.9),runif(1,0.1,0.9),runif(1,0.1,0.9))
	saveGIF({

	components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
	thetas= c((start[2]),(start[3]))
	samples = rbinom(n=N,size=trials,prob=thetas[components])


	plot(density(x),xlim=c(-2,12),ylim=c(0,0.4),xlab="0",
	ylab="Density",main="Approximate 'density' of binomial mixture",col="black",lwd=4) ### plotting the density of the mixture
	par(new=TRUE)

	plot(density(samples),xlim=c(-2,12),ylim=c(0,0.4),xlab="",
	ylab="",main="Approximate 'density' of binomial mixture",col="red",lwd=2)
	legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)

	for( k in 1:10)
	{
	plot(density(x),xlim=c(-2,12),ylim=c(0,0.4),xlab=k,
	ylab="Density",main="Approximate 'density' of binomial mixture",col="black",lwd=4) ### plotting the density of the mixture
	par(new=TRUE)

	p=optim(start,likelihood,method = "L-BFGS-B", lower=c(0.01,0.01,0.01),upper=c(0.99,0.99,0.99))

	start=p$par

	cat("Iteration number:\t", k,"Estimates:\t", c((start[1]),
	(start[2]),
	(start[3])), "\n")

	components = sample(1:numberOfCOins,prob=c((start[1]),1-(start[1])),size=N,replace=TRUE)
	thetas= c((start[2]),(start[3]))
	samples = rbinom(n=N,size=trials,prob=thetas[components])

	samples = rbinom(n=N,size=trials,prob=thetas[components])
	plot(density(samples),xlim=c(-2,12),ylim=c(0,0.4),xlab=k,
	ylab="",main="Approximate 'density' of binomial mixture",col="red",lwd=2)
	legend('topright',c("Original", "EM Estimate"),lty=1, col=c('black', 'red'), bty='n', cex=1)


	}

	})