slwu89/categoricalMLE.R

## categoricalMLE.R
###############################################################
#
# MLE example with categorical data
#
###############################################################

# make up some 'data'; c(0.1,0.2,0.3,0.4) are the 'real' parameters
data = rmultinom(n = 40,size = 4,prob = c(0.1,0.2,0.3,0.4))

# the log-likelihood of the first sample
dmultinom(x = data[,1],size = 4,prob = c(0.1,0.2,0.3,0.4),log = TRUE)

# pretend we don't know the real parameters and want to estimate with MLE
# first, define the function to calculate the log-likelihood of the data given a vector of parameters, 'x'
loglike = function(x, data){
  p1 = x[1]; p2 = x[2]; p3 = x[3]; p4 = x[4]
  -sum(apply(X = data,MARGIN = 2,FUN = dmultinom,size = 4, prob = c(p1,p2,p3,p4), log = TRUE))
}

# use optimization to find the MLE parameters
# initial guess is 0.25 for all 4 categories (we assume we know nothing)

# method 1: L-BFGS-B optimziation for MLE
mle1 = optim(par = c(0.25,0.25,0.25,0.25),fn = loglike,data = data,method = "L-BFGS-B",
             lower = c(.Machine$double.eps,.Machine$double.eps,.Machine$double.eps,.Machine$double.eps),upper = c(1,1,1,1))

# MLE values: it is kind of close to c(0.1,0.2,0.3,0.4)
mle1$par / sum(mle1$par)

# method 2: Nelder-Mead with constraints for MLE
# set constraints so probabilities between 0 and 1
bounds <- matrix(c(
  0,1,
  0,1,
  0,1,
  0,1
), ncol =2 , byrow=TRUE)
colnames(bounds) <- c("lower", "upper")

# Convert the constraints to the ui and ci matrices
n <- nrow(bounds)
ui <- rbind( diag(n), -diag(n) )
ci <- c( bounds[,1], - bounds[,2] )

mle2 = constrOptim(theta = c(0.25,0.25,0.25,0.25),f = loglike,grad = NULL,ui = ui,ci = ci,data = data)

# MLE values: it is kind of close to c(0.1,0.2,0.3,0.4)
mle2$par
	###############################################################
	#
	# MLE example with categorical data
	#
	###############################################################

	# make up some 'data'; c(0.1,0.2,0.3,0.4) are the 'real' parameters
	data = rmultinom(n = 40,size = 4,prob = c(0.1,0.2,0.3,0.4))

	# the log-likelihood of the first sample
	dmultinom(x = data[,1],size = 4,prob = c(0.1,0.2,0.3,0.4),log = TRUE)

	# pretend we don't know the real parameters and want to estimate with MLE
	# first, define the function to calculate the log-likelihood of the data given a vector of parameters, 'x'
	loglike = function(x, data){
	p1 = x[1]; p2 = x[2]; p3 = x[3]; p4 = x[4]
	-sum(apply(X = data,MARGIN = 2,FUN = dmultinom,size = 4, prob = c(p1,p2,p3,p4), log = TRUE))
	}

	# use optimization to find the MLE parameters
	# initial guess is 0.25 for all 4 categories (we assume we know nothing)

	# method 1: L-BFGS-B optimziation for MLE
	mle1 = optim(par = c(0.25,0.25,0.25,0.25),fn = loglike,data = data,method = "L-BFGS-B",
	lower = c(.Machine$double.eps,.Machine$double.eps,.Machine$double.eps,.Machine$double.eps),upper = c(1,1,1,1))

	# MLE values: it is kind of close to c(0.1,0.2,0.3,0.4)
	mle1$par / sum(mle1$par)

	# method 2: Nelder-Mead with constraints for MLE
	# set constraints so probabilities between 0 and 1
	bounds <- matrix(c(
	0,1,
	0,1,
	0,1,
	0,1
	), ncol =2 , byrow=TRUE)
	colnames(bounds) <- c("lower", "upper")

	# Convert the constraints to the ui and ci matrices
	n <- nrow(bounds)
	ui <- rbind( diag(n), -diag(n) )
	ci <- c( bounds[,1], - bounds[,2] )

	mle2 = constrOptim(theta = c(0.25,0.25,0.25,0.25),f = loglike,grad = NULL,ui = ui,ci = ci,data = data)

	# MLE values: it is kind of close to c(0.1,0.2,0.3,0.4)
	mle2$par