ammaraziz/mle_func

## mle_func
mle <- function(data,start=NULL,eps=10**(-9),logit=FALSE,cov=NULL){
  # INPUT:
  # data: data matrix with
  # 1) observed outcome D (0: no; 1: yes)
  # 2) observed types at risk for (0: no; 1: yes)
  # start: starting vector for the algorithm
  # eps: accuracy parameter to stop the algorithm
  # logit: logical to indicate whether the logit model is used
  # (i.e. correct for a covariate)
  # cov: covariate vector to correct for (only if logit=TRUE)
  #
  # OUTPUT:
  # mle: vector with estimated parameter values
  # results: table with results (nr HPV+, MLE, 95% CI, nr lesions, AF)
  # cov: p-value for the slope of the logit-model (only if logit=TRUE)
  K <- length(data[1,-1]); n <- length(data[,1])

  if (is.null(start)){
    start <- colSums(data[,-1])/n; start[start==0] <- 0.01; start[start==1] <- 0.99
    if (logit){
      start <- c(log(start/(1-start)),0)
    }
  }

  est <- as.matrix(start)
  ll <- numeric(0)
  m <- 1; diff1 <- diff2 <- 1

  while (max(abs(diff1),abs(diff2))>eps){
    ll.m <- log_lik(est[,m],data,logit,cov)
    if (((m>1)&&(ll.m$ll<ll[m-1]))||(ll.m$ll=="NaN")){
      x0 <- est[,m-1]
      ll.0 <- log_lik(x0,data,logit,cov)
      ll0 <-ll.0$ll
      dir <- ll.0$H.inv%*%ll.0$g
      est.n <- line_search(x0,ll0,dir,data,logit,cov)
      ll.m <- log_lik(est.n,data,logit,cov)
      est <- cbind(est[,-m],est.n)
      m <- m-1
    } else {
      ll <- c(ll,ll.m$ll)
      est <- cbind(est,est[,m]-ll.m$H.inv%*%ll.m$g)
    }

    if (logit){
      est[est[,m+1]<(-15),m+1] <- -15; est[est[,m+1]>10,m+1] <- 10
      diff1 <- 1/(1+exp(-est[,m+1]))-1/(1+exp(-est[,m]));
    } else {
      est[est[,m+1]<=eps*10**(-3),m+1] <- 10**(-5)
      est[est[,m+1]>=1-eps*10**(-3),m+1] <- 1-10**(-5)
      diff1 <- est[,m+1]-est[,m]
    }
    if (m>1){ diff2 <- ll[m]-ll[m-1] }
    m <- m+1
  }

  ll.m <- log_lik(est[,m-1],data,logit,cov)
  se <- sqrt(-diag(ll.m$H.inv))

  if (logit) { mle <- c(est[1:K,m-1]-mean(cov)*est[K+1,m-1],est[K+1,m-1])
  } else { mle <- est[,m-1] }

  if (logit){
    a <- mle[1:K]; b <- mle[K+1]; x <- mean(cov)
    mle.C <- a+b*x
    y1 <- mle.C-qnorm(0.975)*se[-(K+1)]; y2 <- mle.C+qnorm(0.975)*se[-(K+1)]
    CI <- cbind(1/(1+exp(-y1)),1/(1+exp(-y2)))
    mle.C <- 1/(1+exp(-mle.C))
  } else {
    CI <- cbind(mle-qnorm(0.975)*se,mle+qnorm(0.975)*se)
  }

  CI[CI[,1]<0,1] <- 10**(-5); CI[CI[,2]>1,2] <- 1-10**(-5)
  Wald.score <- abs(mle-rep(0,K+logit))/se
  p.val <- 2*(1-pnorm(Wald.score))

  if (logit) { nr <- colSums(data[,-1])*mle.C
  } else { nr <- colSums(data[,-1])*mle }
  AF <- nr/sum(nr)

  if (logit){
    out <- cbind(colSums(data[,-1]),round(mle.C,3),round(CI,3),round(nr,1),round(AF,3))
    dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF")
    list(mle=mle,results=out,cov=round(p.val[K+1],3))
  } else {
    out <- cbind(colSums(data[,-1]),round(mle,3),round(CI,3),round(nr,1),round(AF,3))
    dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF")
    list(mle=mle,results=out)
  }
}

log_lik <- function(par,data,logit=FALSE,cov=NULL){
  # INPUT:
  # par: parameter vector to determine the log-likelihood at
  # data / logit / cov: as in mle-function
  #
  # OUTPUT:
  # ll: value of the log-likelihood at par
  # g: gradient vector with partial derivative of log-likelihood
  # H: Hessian matrix with the second partial derivative of log-likelihood
  # H.inv: inverse of H
  K <- length(data[1,-1]); N <- length(data[,1])
  d <- data[,1]; Y <- data[,-1]
  if (logit){ X <- cov-mean(cov) }
  if (logit){
    pred <- matrix(rep(par[1:K],N),byrow=TRUE,ncol=K)+par[K+1]*X
    pi <- 1/(1+exp(-pred))
  } else {
    pi <- matrix(rep(par,N),byrow=TRUE,ncol=K)
  }
  prod <- apply((1-pi)**Y,1,prod)
  if (logit){
    sum.1 <- apply(1-(1-pi)**Y,1,sum)
    sum.2 <- apply((pi**Y)*(1-pi**Y),1,sum)
  }
  ll <- sum(d*log(1-prod)+(1-d)*log(prod))
  g <- numeric(K+logit); H <- matrix(0,ncol=K+logit,nrow=K+logit)
  for (k in 1:K){
    ind.k <- which(Y[,k]==1)
    if (logit){
      g[k] <- sum((d*pi[,k]/(1-prod)-pi[,k])[ind.k])
      H[k,k] <- sum((d*pi[,k]*(1-pi[,k]-prod)/(1-prod)**2-pi[,k]*(1-pi[,k]))[ind.k])
      H[k,K+1] <- H[K+1,k] <- sum((X*(d*pi[,k]*((1-pi[,k])*(1-prod)-prod*sum.1)/(1-prod)**2
                                      -pi[,k]*(1-pi[,k])))[ind.k])
    } else {
      g[k] <- sum((d*prod/((1-pi[,k])*(1-prod))-(1-d)/(1-pi[,k]))[ind.k])
      H[k,k] <- -sum((d*prod**2/((1-prod)**2*(1-pi[,k])**2)+(1-d)/(1-pi[,k])**2)[ind.k])
    }
    for (l in min((k+1),K):K){
      if (k!=l){
        ind.kl<- which((Y[,k]==1)&(Y[,l]==1))
        if (logit){
          H[k,l] <- H[l,k] <- sum((d*pi[,k]*pi[,l]*prod/(1-prod)**2)[ind.kl])
        } else {
          H[k,l] <- H[l,k] <- -sum((d*prod**2/((1-prod)**2*(1-pi[,k])*(1-pi[,l])))[ind.kl])
        }
      }
    }
  }
  if (logit){
    g[K+1] <- sum(X*(d*sum.1/(1-prod)-sum.1))
    H[K+1,K+1] <- sum(X**2*(d*((1-prod)*sum.2-prod*sum.1**2)/(1-prod)**2-sum.2))
  }
  if (logit){
    # invert H via block-inversion for case of "singularity" due to precision
    A <- H[1:K,1:K]; B <- H[K+1,1:K]; C <- H[1:K,K+1]; D <- H[K+1,K+1]
    A.inv <- solve(A); A.inv_B <- A.inv%*%B; C_A.inv <- C%*%A.inv
    L1 <- A.inv + (A.inv_B%*%L4)%*%C_A.inv; L2 <- -A.inv_B%*%L4
    L3 <- -L4%*%C_A.inv; L4 <- 1/(D-C%*%A.inv%*%B)
    H.inv <- rbind(cbind(L1,L2),cbind(L3,L4))
  } else {
    H.inv <- solve(H)
  }
  list(ll=ll,g=g,H=H,H.inv=H.inv)
}

line_search <- function(x0, ll0, dir, data, logit=FALSE, cov=NULL) {
  # INPUT:
  # x0: current estimate to start line search algorithm from
  # ll0: log-likelihood value at x0
  # dir: direction for line search (H**(-1)*g)
  # data / logit / cov: as in mle-function
  #
  # OUTPUT:
  # x.n: new estimate based on line search
  lambda <- 1
  ll.n <- log_lik(x0,data,logit,cov)
  # determine the correction direction
  x.d <- x0-0.01*dir;
  if (logit) { x.d[x.d>10] <- 10
  x.d[-x.d>15] <- -15
  } else { x.d[x.d<=0] <- 10**(-5)
  x.d[x.d>=1] <- 1-10**(-5) }
  ll.d <- log_lik(x.d,data,logit,cov)
  if (ll.d$ll<ll.n$ll) {dir <- -dir}
  ll.n$ll <- ll.n$ll-1
  while(ll.n$ll<ll0){
    lambda <- lambda/2
    x.n <- x0-lambda*dir;
    if (logit){ x.n[x.n>10] <- 10
    x.n[-x.n>15] <- -15
    } else { x.n[x.n<=0] <- 10**(-5)
    x.n[x.n>=1] <- 1-10**(-5) }
    ll.n <- log_lik(x.n,data,logit,cov)}
  return(x.n)
}
	mle <- function(data,start=NULL,eps=10**(-9),logit=FALSE,cov=NULL){
	# INPUT:
	# data: data matrix with
	# 1) observed outcome D (0: no; 1: yes)
	# 2) observed types at risk for (0: no; 1: yes)
	# start: starting vector for the algorithm
	# eps: accuracy parameter to stop the algorithm
	# logit: logical to indicate whether the logit model is used
	# (i.e. correct for a covariate)
	# cov: covariate vector to correct for (only if logit=TRUE)
	#
	# OUTPUT:
	# mle: vector with estimated parameter values
	# results: table with results (nr HPV+, MLE, 95% CI, nr lesions, AF)
	# cov: p-value for the slope of the logit-model (only if logit=TRUE)
	K <- length(data[1,-1]); n <- length(data[,1])

	if (is.null(start)){
	start <- colSums(data[,-1])/n; start[start==0] <- 0.01; start[start==1] <- 0.99
	if (logit){
	start <- c(log(start/(1-start)),0)
	}
	}

	est <- as.matrix(start)
	ll <- numeric(0)
	m <- 1; diff1 <- diff2 <- 1

	while (max(abs(diff1),abs(diff2))>eps){
	ll.m <- log_lik(est[,m],data,logit,cov)
	if (((m>1)&&(ll.m$ll<ll[m-1]))\|\|(ll.m$ll=="NaN")){
	x0 <- est[,m-1]
	ll.0 <- log_lik(x0,data,logit,cov)
	ll0 <-ll.0$ll
	dir <- ll.0$H.inv%*%ll.0$g
	est.n <- line_search(x0,ll0,dir,data,logit,cov)
	ll.m <- log_lik(est.n,data,logit,cov)
	est <- cbind(est[,-m],est.n)
	m <- m-1
	} else {
	ll <- c(ll,ll.m$ll)
	est <- cbind(est,est[,m]-ll.m$H.inv%*%ll.m$g)
	}

	if (logit){
	est[est[,m+1]<(-15),m+1] <- -15; est[est[,m+1]>10,m+1] <- 10
	diff1 <- 1/(1+exp(-est[,m+1]))-1/(1+exp(-est[,m]));
	} else {
	est[est[,m+1]<=eps10(-3),m+1] <- 10*(-5)
	est[est[,m+1]>=1-eps10(-3),m+1] <- 1-10*(-5)
	diff1 <- est[,m+1]-est[,m]
	}
	if (m>1){ diff2 <- ll[m]-ll[m-1] }
	m <- m+1
	}

	ll.m <- log_lik(est[,m-1],data,logit,cov)
	se <- sqrt(-diag(ll.m$H.inv))

	if (logit) { mle <- c(est[1:K,m-1]-mean(cov)*est[K+1,m-1],est[K+1,m-1])
	} else { mle <- est[,m-1] }

	if (logit){
	a <- mle[1:K]; b <- mle[K+1]; x <- mean(cov)
	mle.C <- a+b*x
	y1 <- mle.C-qnorm(0.975)se[-(K+1)]; y2 <- mle.C+qnorm(0.975)se[-(K+1)]
	CI <- cbind(1/(1+exp(-y1)),1/(1+exp(-y2)))
	mle.C <- 1/(1+exp(-mle.C))
	} else {
	CI <- cbind(mle-qnorm(0.975)se,mle+qnorm(0.975)se)
	}

	CI[CI[,1]<0,1] <- 10(-5); CI[CI[,2]>1,2] <- 1-10(-5)
	Wald.score <- abs(mle-rep(0,K+logit))/se
	p.val <- 2*(1-pnorm(Wald.score))

	if (logit) { nr <- colSums(data[,-1])*mle.C
	} else { nr <- colSums(data[,-1])*mle }
	AF <- nr/sum(nr)

	if (logit){
	out <- cbind(colSums(data[,-1]),round(mle.C,3),round(CI,3),round(nr,1),round(AF,3))
	dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF")
	list(mle=mle,results=out,cov=round(p.val[K+1],3))
	} else {
	out <- cbind(colSums(data[,-1]),round(mle,3),round(CI,3),round(nr,1),round(AF,3))
	dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF")
	list(mle=mle,results=out)
	}
	}

	log_lik <- function(par,data,logit=FALSE,cov=NULL){
	# INPUT:
	# par: parameter vector to determine the log-likelihood at
	# data / logit / cov: as in mle-function
	#
	# OUTPUT:
	# ll: value of the log-likelihood at par
	# g: gradient vector with partial derivative of log-likelihood
	# H: Hessian matrix with the second partial derivative of log-likelihood
	# H.inv: inverse of H
	K <- length(data[1,-1]); N <- length(data[,1])
	d <- data[,1]; Y <- data[,-1]
	if (logit){ X <- cov-mean(cov) }
	if (logit){
	pred <- matrix(rep(par[1:K],N),byrow=TRUE,ncol=K)+par[K+1]*X
	pi <- 1/(1+exp(-pred))
	} else {
	pi <- matrix(rep(par,N),byrow=TRUE,ncol=K)
	}
	prod <- apply((1-pi)**Y,1,prod)
	if (logit){
	sum.1 <- apply(1-(1-pi)**Y,1,sum)
	sum.2 <- apply((pi*Y)(1-pi**Y),1,sum)
	}
	ll <- sum(dlog(1-prod)+(1-d)log(prod))
	g <- numeric(K+logit); H <- matrix(0,ncol=K+logit,nrow=K+logit)
	for (k in 1:K){
	ind.k <- which(Y[,k]==1)
	if (logit){
	g[k] <- sum((d*pi[,k]/(1-prod)-pi[,k])[ind.k])
	H[k,k] <- sum((dpi[,k](1-pi[,k]-prod)/(1-prod)*2-pi[,k](1-pi[,k]))[ind.k])
	H[k,K+1] <- H[K+1,k] <- sum((X(dpi[,k]((1-pi[,k])(1-prod)-prodsum.1)/(1-prod)*2
	-pi[,k]*(1-pi[,k])))[ind.k])
	} else {
	g[k] <- sum((dprod/((1-pi[,k])(1-prod))-(1-d)/(1-pi[,k]))[ind.k])
	H[k,k] <- -sum((dprod2/((1-prod)2(1-pi[,k])2)+(1-d)/(1-pi[,k])2)[ind.k])
	}
	for (l in min((k+1),K):K){
	if (k!=l){
	ind.kl<- which((Y[,k]==1)&(Y[,l]==1))
	if (logit){
	H[k,l] <- H[l,k] <- sum((dpi[,k]pi[,l]prod/(1-prod)*2)[ind.kl])
	} else {
	H[k,l] <- H[l,k] <- -sum((dprod2/((1-prod)2(1-pi[,k])*(1-pi[,l])))[ind.kl])
	}
	}
	}
	}
	if (logit){
	g[K+1] <- sum(X(dsum.1/(1-prod)-sum.1))
	H[K+1,K+1] <- sum(X*2(d((1-prod)sum.2-prodsum.12)/(1-prod)*2-sum.2))
	}
	if (logit){
	# invert H via block-inversion for case of "singularity" due to precision
	A <- H[1:K,1:K]; B <- H[K+1,1:K]; C <- H[1:K,K+1]; D <- H[K+1,K+1]
	A.inv <- solve(A); A.inv_B <- A.inv%%B; C_A.inv <- C%%A.inv
	L1 <- A.inv + (A.inv_B%%L4)%%C_A.inv; L2 <- -A.inv_B%*%L4
	L3 <- -L4%%C_A.inv; L4 <- 1/(D-C%%A.inv%*%B)
	H.inv <- rbind(cbind(L1,L2),cbind(L3,L4))
	} else {
	H.inv <- solve(H)
	}
	list(ll=ll,g=g,H=H,H.inv=H.inv)
	}

	line_search <- function(x0, ll0, dir, data, logit=FALSE, cov=NULL) {
	# INPUT:
	# x0: current estimate to start line search algorithm from
	# ll0: log-likelihood value at x0
	# dir: direction for line search (H*(-1)g)
	# data / logit / cov: as in mle-function
	#
	# OUTPUT:
	# x.n: new estimate based on line search
	lambda <- 1
	ll.n <- log_lik(x0,data,logit,cov)
	# determine the correction direction
	x.d <- x0-0.01*dir;
	if (logit) { x.d[x.d>10] <- 10
	x.d[-x.d>15] <- -15
	} else { x.d[x.d<=0] <- 10**(-5)
	x.d[x.d>=1] <- 1-10**(-5) }
	ll.d <- log_lik(x.d,data,logit,cov)
	if (ll.d$ll<ll.n$ll) {dir <- -dir}
	ll.n$ll <- ll.n$ll-1
	while(ll.n$ll<ll0){
	lambda <- lambda/2
	x.n <- x0-lambda*dir;
	if (logit){ x.n[x.n>10] <- 10
	x.n[-x.n>15] <- -15
	} else { x.n[x.n<=0] <- 10**(-5)
	x.n[x.n>=1] <- 1-10**(-5) }
	ll.n <- log_lik(x.n,data,logit,cov)}
	return(x.n)
	}