baogorek/Replicate_Lmer.R

## Replicate_Lmer.R
rm(list = ls())

# Setup: 3 subjects, 5 total measurements, one covariate
df <- data.frame(x1 = c(1,5,2,3,4), subject = c(1,2,3,1,2))
df$subject <- factor(df$subject)
n <- 5

beta <- matrix(c(1.3, 2), ncol=1) # coeffient for x1, a value of 2
u <- matrix(c(-.5, .6, 1.5), ncol=1) # the three random effects for subjects

# Create design matrices Z and X
Z <- model.matrix(~ 0 + factor(subject), data = df)
X <- model.matrix(~ 1 + x1, data = df)

# Complete the Data Generating Process
epsilon <- c(.9, .1, -.2, .3, -.4)
df$y <- X %*% beta + Z %*% u + epsilon
y <- matrix(df$y, ncol = 1) #response vector

### Goal: match estimates from the following lmer output!
library(lme4)
my.lmer <- lmer( y ~ x1 + (1 | subject), data = df, REML = FALSE)
summary(my.lmer)
ranef(my.lmer)

# Below is solution to the linear system with known variance parms
solution <- function(sigma_u, sigma_e){
    V = sigma_u^2 * Z %*% t(Z) + sigma_e^2 * diag(1,n,n)
    solve(t(X) %*% solve(V) %*% X) %*% t(X) %*% solve(V) %*% y
}
beta.hat <- solution(1,1)

log.likelihood <- function(beta.hat, sigma_u, sigma_e){
  V = sigma_u^2 * Z %*% t(Z) + sigma_e^2 * diag(1,n,n)
  resid = y - X %*% beta.hat

  -(n/2)*log(2*pi) + -.5*log(det(V)) + -.5*t(resid) %*% solve(V) %*% resid
}

#### test runs  ####
log.likelihood(beta.hat,1,1)

#assumes beta.hat is in the global environment
neg.partial.log.likelihood <- function(sd.parms){
   -log.likelihood(beta.hat, sd.parms[1], sd.parms[2])
}
neg.partial.log.likelihood(c(1,1))

#default optim performs minimization
sd.parms.hat <- optim(c(1,1), neg.partial.log.likelihood )$par

### The real thing ####
# iterate until...8?

#initial values...change them. How much do they matter?
sd.parms.hat <- c(.5,.2)
beta.hat <- c(2, 1.5)

for( iter in 1:8 ){
  beta.hat <- solution(sd.parms.hat[1], sd.parms.hat[2])
  sd.parms.hat <- optim(sd.parms.hat, neg.partial.log.likelihood )$par

  cat("\n*** Iteration ", iter," ***")
  print(beta.hat)
  cat("sigma_u estimate: ", sd.parms.hat[1], ", sigma_e estimate: ", sd.parms.hat[2], "\n")
  cat("log likelihood: ", log.likelihood(beta.hat, sd.parms.hat[1], sd.parms.hat[2]), "\n")
}

# maximizing the posterior mode of u | Y, X, beta, sigma_u, sigma_u is
# analytically tractible under normality assumptions.
# The closed form solution comes from vector/Matrix calculus.

resid = y - X %*% beta.hat
R = t(Z) %*% Z + (sd.parms.hat[2]^2/sd.parms.hat[1]^2)*diag(1,3,3)
t(resid) %*% Z %*% solve(R)

#### Towards a "hat" matrix P such that Py = y.hat, and a concept of effective df
V = sd.parms.hat[1]^2 * Z %*% t(Z) + sd.parms.hat[2]^2 * diag(1,n,n)
V.inv = solve(V)
Q.x = solve(t(X) %*% V.inv %*% X) %*% t(X) %*% V.inv
Q.z = solve(R) %*% t(Z) %*% (diag(1,n,n) - X %*% Q.x)

# check
fixef(my.lmer)
ranef(my.lmer)
Q.x %*% y # fixed effects estimate
Q.z %*% y # random effects estimates

P.x = X %*% Q.x
P.z = Z %*% Q.z

P = P.x +  P.z
rownames(P) <- NULL

# P is the "hat matrix," but let's make sure
P %*% y
my.lmer@resid
resid = y - P%*%y

# How many effective degrees of freedom does our model have?
trace  <- function(X) sum( diag(1,nrow(X),nrow(X)) * X)

# Model degrees of freedom
trace(P)

#degrees of freedom from the fixed effects
trace(P.x)

#degrees of freedom from the random effects
trace(P.z)

#estimated error degrees of freedom
5 - trace(P)

## Finally, let's compute the GLS variance covariance matrix of the fixed effects
solve(t(X) %*% V.inv %*% X)
vcov(my.lmer)
	rm(list = ls())

	# Setup: 3 subjects, 5 total measurements, one covariate
	df <- data.frame(x1 = c(1,5,2,3,4), subject = c(1,2,3,1,2))
	df$subject <- factor(df$subject)
	n <- 5

	beta <- matrix(c(1.3, 2), ncol=1) # coeffient for x1, a value of 2
	u <- matrix(c(-.5, .6, 1.5), ncol=1) # the three random effects for subjects

	# Create design matrices Z and X
	Z <- model.matrix(~ 0 + factor(subject), data = df)
	X <- model.matrix(~ 1 + x1, data = df)

	# Complete the Data Generating Process
	epsilon <- c(.9, .1, -.2, .3, -.4)
	df$y <- X %% beta + Z %% u + epsilon
	y <- matrix(df$y, ncol = 1) #response vector

	### Goal: match estimates from the following lmer output!
	library(lme4)
	my.lmer <- lmer( y ~ x1 + (1 \| subject), data = df, REML = FALSE)
	summary(my.lmer)
	ranef(my.lmer)

	# Below is solution to the linear system with known variance parms
	solution <- function(sigma_u, sigma_e){
	V = sigma_u^2 * Z %% t(Z) + sigma_e^2 diag(1,n,n)
	solve(t(X) %% solve(V) %% X) %% t(X) %% solve(V) %*% y
	}
	beta.hat <- solution(1,1)

	log.likelihood <- function(beta.hat, sigma_u, sigma_e){
	V = sigma_u^2 * Z %% t(Z) + sigma_e^2 diag(1,n,n)
	resid = y - X %*% beta.hat

	-(n/2)log(2pi) + -.5log(det(V)) + -.5t(resid) %% solve(V) %% resid
	}

	#### test runs ####
	log.likelihood(beta.hat,1,1)

	#assumes beta.hat is in the global environment
	neg.partial.log.likelihood <- function(sd.parms){
	-log.likelihood(beta.hat, sd.parms[1], sd.parms[2])
	}
	neg.partial.log.likelihood(c(1,1))

	#default optim performs minimization
	sd.parms.hat <- optim(c(1,1), neg.partial.log.likelihood )$par

	### The real thing ####
	# iterate until...8?

	#initial values...change them. How much do they matter?
	sd.parms.hat <- c(.5,.2)
	beta.hat <- c(2, 1.5)

	for( iter in 1:8 ){
	beta.hat <- solution(sd.parms.hat[1], sd.parms.hat[2])
	sd.parms.hat <- optim(sd.parms.hat, neg.partial.log.likelihood )$par

	cat("\n* Iteration ", iter," *")
	print(beta.hat)
	cat("sigma_u estimate: ", sd.parms.hat[1], ", sigma_e estimate: ", sd.parms.hat[2], "\n")
	cat("log likelihood: ", log.likelihood(beta.hat, sd.parms.hat[1], sd.parms.hat[2]), "\n")
	}

	# maximizing the posterior mode of u \| Y, X, beta, sigma_u, sigma_u is
	# analytically tractible under normality assumptions.
	# The closed form solution comes from vector/Matrix calculus.

	resid = y - X %*% beta.hat
	R = t(Z) %% Z + (sd.parms.hat[2]^2/sd.parms.hat[1]^2)diag(1,3,3)
	t(resid) %% Z %% solve(R)

	#### Towards a "hat" matrix P such that Py = y.hat, and a concept of effective df
	V = sd.parms.hat[1]^2 * Z %% t(Z) + sd.parms.hat[2]^2 diag(1,n,n)
	V.inv = solve(V)
	Q.x = solve(t(X) %% V.inv %% X) %% t(X) %% V.inv
	Q.z = solve(R) %% t(Z) %% (diag(1,n,n) - X %*% Q.x)

	# check
	fixef(my.lmer)
	ranef(my.lmer)
	Q.x %*% y # fixed effects estimate
	Q.z %*% y # random effects estimates

	P.x = X %*% Q.x
	P.z = Z %*% Q.z

	P = P.x + P.z
	rownames(P) <- NULL

	# P is the "hat matrix," but let's make sure
	P %*% y
	my.lmer@resid
	resid = y - P%*%y

	# How many effective degrees of freedom does our model have?
	trace <- function(X) sum( diag(1,nrow(X),nrow(X)) * X)

	# Model degrees of freedom
	trace(P)

	#degrees of freedom from the fixed effects
	trace(P.x)

	#degrees of freedom from the random effects
	trace(P.z)

	#estimated error degrees of freedom
	5 - trace(P)

	## Finally, let's compute the GLS variance covariance matrix of the fixed effects
	solve(t(X) %% V.inv %% X)
	vcov(my.lmer)