mdietze/linear.R

## linear.R
## linear.R -- to accompany Module 4 of
## Methods of Regression
##
## by Leah R Johnson, Stats, VT


## load the ex.mc2 function
source("sampling2.R")

## set inputs (covariates)
x <- seq(-3,2,length=30)

## set regression coefficients alpha=beta0, beta=beta1, quad one is hard coded
alpha <- 10; beta <- 0.1
plot(x, alpha+beta*x-1*x^2, type="l")


## monte-carlo linear fits
ex.mc2(x, alpha, beta, sampf=quadf)


## sampling2.R
## Code provided by:
## Leah R Johnson 30 July 2019
## adapted from code by Robert B Gramacy
## Virginia Tech Department of Statistics
##
##
## see e.g., linear.R for examples of how to use the functions
## contained in this file


## linf:
##
## generate from a standard linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result.  return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf

linf <- function(x, alpha, beta)
  {
    epsilon <- rnorm(length(x))
    y <- alpha + beta*x + epsilon
    fit <- lm(y ~ x)
    return(list(y=y, fit=fit, n=rep(1,length(y)), name="linear"))
  }


## loginf:
##
## generate from a log-linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result.  return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf

loglinf <- function(x, alpha, beta)
  {
    mu <- exp(alpha + beta*x)
    y <- rpois(length(mu), mu)
    fit <- glm(y ~ x, family="poisson")
    return(list(y=y, fit=fit, n=rep(1,length(y)), name="log-linear"))
  }


## logit:
##
## generate from a binomial-logit model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result.  return the sample, the
## fit, the total counts, and a descriptive name

logitf <- function(x, alpha, beta)
  {
    n <- rpois(length(x), lambda=2)+1
    p <- exp(alpha + beta*x)/(1+exp(alpha + beta*x))
    y <- rbinom(length(n), n, prob=p)
    fit <- glm(y/n ~ x, family="binomial", weights=n)
    return(list(y=y, fit=fit, n=n, name="logit"))
  }


## three.ex:
##
## sample (plot) data from a standard linear model, a log-linear
## model and a binomial-logit model, with options to add the
## MLE regression line to the plot (and regression coeffs)

three.ex <- function(x, alpha, beta, infer=FALSE) {

  ## for plotting on a 2x2 grid
  par(mfrow=c(2,2), cex.main=2, cex.lab=2, cex.axis=2)

  ## simple linear model
  lin <- linf(x, alpha, beta)
  plot(x,lin$y, ylab="y", cex=2)

  ## plot ML inference for the linear model
  if(infer) {
    p.lin <- predict(lin$fit, se.fit=TRUE)
    lines(x, p.lin$fit, lwd=2)
    lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
    lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
    title(paste(lin$name, ": m = a + b*x, (a,b)=(",
                signif(coef(lin$fit)[1],2), ",", signif(coef(lin$fit)[2],2),
                ")", sep=""))
  } else title(lin$name)

  ## simple log-linear model
  loglin <- loglinf(x, alpha, beta)
  plot(x, loglin$y, ylab="y", cex=2)

  ## ML inference for the log-linear model
  if(infer) {
    p.lin <- predict(loglin$fit, type="response", se.fit=TRUE)
    lines(x, p.lin$fit, lwd=2)
    lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
    lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
    title(paste(loglin$name, ": log(m) = a + b*x, (a,b)=(",
                signif(coef(loglin$fit)[1],2), ",",
                signif(coef(loglin$fit)[2],2), ")", sep=""))
  } else title(loglin$name)

  ## simple logit model
  logit <- logitf(x, alpha, beta)
  plot(x, logit$y/logit$n, type="n", ylab="y")
  text(x, logit$y/logit$n, logit$n, cex=2)

  ## ML inference for the logit model
  if(infer) {
    p.logit <- predict(logit$fit, type="response", se.fit=TRUE)
    lines(x, p.logit$fit, lwd=2)
    lines(x, p.logit$fit+2*p.logit$se.fit, col=2, lty=2, lwd=2)
    lines(x, p.logit$fit-2*p.logit$se.fit, col=2, lty=2, lwd=2)
    title(paste(logit$name, ": log(m/(1-m)) = a + b*x, (a,b)=(",
                signif(coef(logit$fit)[1],2), ",",
                signif(coef(logit$fit)[2],2), ")", sep=""))
  } else title("logit")
}


## ex2.mc:
##
## monte-carlo sampling from the provided sampling function
## (one of the three GLM functions above) with ML fits,
## and finally summarize the estimates with means and
## errorbars


ex.mc <- function(x, alpha, beta, sampf=linf) {

  ## initialization
  i <- 0
  par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
  l <- ab <- NULL

  ## while the user wishes to continue
  while(TRUE) {

    ## sample with the sampling function sampf
    i <- i+1
    samp <- sampf(x, alpha, beta)
    l <- cbind(l, predict(samp$fit, type="response"))

    ## plot the points
    if(any(samp$n != 1)) {
      plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(l)), type="n", ylab="y")
      text(x, samp$y/samp$n, samp$n, cex=2, col=i)
    } else {
      plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(l)), ylab="y")
    }

    ## plot the accumulated regression lines
    matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
    ab <- rbind(ab, coef(samp$fit))
    title(paste(samp$name, ": m = a + b*x, average (a,b)=(",
                signif(mean(ab[,1]),2), ",",
                signif(mean(ab[,2]),2), ")", sep=""))

    ## continue sampling?
    if(readline("press RETURN to continue, q to stop: ") == "q") break
  }

  ## plot the resulting mean regression line and quantiles
  m <- apply(l, 1, mean);
  plot(x, m, type="l", lwd=2, ylab="y")
  title(paste(samp$name, ": predictive mean and interval", sep=""))
  q1 <- apply(l, 1, quantile, 0.05)
  lines(x, q1, lwd=2, col=2)
  q2 <- apply(l, 1, quantile, 0.95)
  lines(x, q2, lwd=2, col=2)
}


## ex.mc:
##
## monte-carlo sampling from the provided sampling function
## (one of the three GLM functions above) with ML fits,
## and finally summarize the estimates with means and
## errorbars

## linf:
##
## generate from a standard linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result.  return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf

quadf <- function(x, alpha, beta){
    epsilon <- rnorm(length(x))
    x2<-x^2
    y <- alpha + beta*x - x2 + epsilon
    fit <- lm(y ~ x+x2)
    return(list(y=y, fit=fit, n=rep(1,length(y)), name="quadratic"))
}


ex.mc2 <- function(x, alpha, beta, sampf=linf) {

  ## initialization
  i <- 0
  par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
  l <- ab <- ys <- NULL

  ## while the user wishes to continue
  while(TRUE) {

    ## sample with the sampling function sampf
    i <- i+1
    samp <- sampf(x, alpha, beta)
    l <- cbind(l, predict(samp$fit, type="response"))
    ys<- cbind(ys, samp$y/samp$n)

    ## plot the points
    if(any(samp$n != 1)) {
      plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(ys)), type="n", ylab="y")
      text(x, samp$y/samp$n, samp$n, cex=2, col=i)
    } else {
      plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(ys)), ylab="y")
    }

    ## plot the accumulated regression lines
    matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
    matplot(x, ys, col=1:i, pch=20, add=TRUE)
    ab <- rbind(ab, coef(samp$fit))
    #title(paste(samp$name, ": m = a + b*x, (a,b)=(",
    #            signif((ab[i,1]),2), ",",
    #            signif((ab[i,2]),2), ")", sep=""))

    ## continue sampling?
    if(readline("press RETURN to continue, q to stop: ") == "q") break
  }

  ## plot the resulting mean regression line and quantiles
  m <- apply(l, 1, mean);
  plot(x, m, type="l", lwd=2, ylab="y", ylim=range(as.vector(ys)))
  title(paste(samp$name, ": predictive mean, CI, and interval", sep=""))
  q1 <- apply(l, 1, quantile, 0.025)
  lines(x, q1, lwd=2, col=2)
  q2 <- apply(l, 1, quantile, 0.975)
  lines(x, q2, lwd=2, col=2)

  qp1 <-  apply(ys, 1, quantile, 0.025)
  qp2 <-  apply(ys, 1, quantile, 0.975)
  lines(x, qp2, lwd=2, col=2, lty=2)
  lines(x, qp1, lwd=2, col=2, lty=2)

}
	## linear.R -- to accompany Module 4 of
	## Methods of Regression
	##
	## by Leah R Johnson, Stats, VT


	## load the ex.mc2 function
	source("sampling2.R")

	## set inputs (covariates)
	x <- seq(-3,2,length=30)

	## set regression coefficients alpha=beta0, beta=beta1, quad one is hard coded
	alpha <- 10; beta <- 0.1
	plot(x, alpha+betax-1x^2, type="l")


	## monte-carlo linear fits
	ex.mc2(x, alpha, beta, sampf=quadf)
	## Code provided by:
	## Leah R Johnson 30 July 2019
	## adapted from code by Robert B Gramacy
	## Virginia Tech Department of Statistics
	##
	##
	## see e.g., linear.R for examples of how to use the functions
	## contained in this file


	## linf:
	##
	## generate from a standard linear model with covariate
	## x, intercept alpha and slope beta -- then fit a linear
	## model (ML) to the result. return the sample, the
	## fit and a descriptive name, n is a vector of ones to be
	## compatible with logitf

	linf <- function(x, alpha, beta)
	{
	epsilon <- rnorm(length(x))
	y <- alpha + beta*x + epsilon
	fit <- lm(y ~ x)
	return(list(y=y, fit=fit, n=rep(1,length(y)), name="linear"))
	}


	## loginf:
	##
	## generate from a log-linear model with covariate
	## x, intercept alpha and slope beta -- then fit a linear
	## model (ML) to the result. return the sample, the
	## fit and a descriptive name, n is a vector of ones to be
	## compatible with logitf

	loglinf <- function(x, alpha, beta)
	{
	mu <- exp(alpha + beta*x)
	y <- rpois(length(mu), mu)
	fit <- glm(y ~ x, family="poisson")
	return(list(y=y, fit=fit, n=rep(1,length(y)), name="log-linear"))
	}


	## logit:
	##
	## generate from a binomial-logit model with covariate
	## x, intercept alpha and slope beta -- then fit a linear
	## model (ML) to the result. return the sample, the
	## fit, the total counts, and a descriptive name

	logitf <- function(x, alpha, beta)
	{
	n <- rpois(length(x), lambda=2)+1
	p <- exp(alpha + betax)/(1+exp(alpha + betax))
	y <- rbinom(length(n), n, prob=p)
	fit <- glm(y/n ~ x, family="binomial", weights=n)
	return(list(y=y, fit=fit, n=n, name="logit"))
	}


	## three.ex:
	##
	## sample (plot) data from a standard linear model, a log-linear
	## model and a binomial-logit model, with options to add the
	## MLE regression line to the plot (and regression coeffs)

	three.ex <- function(x, alpha, beta, infer=FALSE) {

	## for plotting on a 2x2 grid
	par(mfrow=c(2,2), cex.main=2, cex.lab=2, cex.axis=2)

	## simple linear model
	lin <- linf(x, alpha, beta)
	plot(x,lin$y, ylab="y", cex=2)

	## plot ML inference for the linear model
	if(infer) {
	p.lin <- predict(lin$fit, se.fit=TRUE)
	lines(x, p.lin$fit, lwd=2)
	lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
	lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
	title(paste(lin$name, ": m = a + b*x, (a,b)=(",
	signif(coef(lin$fit)[1],2), ",", signif(coef(lin$fit)[2],2),
	")", sep=""))
	} else title(lin$name)

	## simple log-linear model
	loglin <- loglinf(x, alpha, beta)
	plot(x, loglin$y, ylab="y", cex=2)

	## ML inference for the log-linear model
	if(infer) {
	p.lin <- predict(loglin$fit, type="response", se.fit=TRUE)
	lines(x, p.lin$fit, lwd=2)
	lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
	lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
	title(paste(loglin$name, ": log(m) = a + b*x, (a,b)=(",
	signif(coef(loglin$fit)[1],2), ",",
	signif(coef(loglin$fit)[2],2), ")", sep=""))
	} else title(loglin$name)

	## simple logit model
	logit <- logitf(x, alpha, beta)
	plot(x, logit$y/logit$n, type="n", ylab="y")
	text(x, logit$y/logit$n, logit$n, cex=2)

	## ML inference for the logit model
	if(infer) {
	p.logit <- predict(logit$fit, type="response", se.fit=TRUE)
	lines(x, p.logit$fit, lwd=2)
	lines(x, p.logit$fit+2*p.logit$se.fit, col=2, lty=2, lwd=2)
	lines(x, p.logit$fit-2*p.logit$se.fit, col=2, lty=2, lwd=2)
	title(paste(logit$name, ": log(m/(1-m)) = a + b*x, (a,b)=(",
	signif(coef(logit$fit)[1],2), ",",
	signif(coef(logit$fit)[2],2), ")", sep=""))
	} else title("logit")
	}


	## ex2.mc:
	##
	## monte-carlo sampling from the provided sampling function
	## (one of the three GLM functions above) with ML fits,
	## and finally summarize the estimates with means and
	## errorbars



	ex.mc <- function(x, alpha, beta, sampf=linf) {

	## initialization
	i <- 0
	par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
	l <- ab <- NULL

	## while the user wishes to continue
	while(TRUE) {

	## sample with the sampling function sampf
	i <- i+1
	samp <- sampf(x, alpha, beta)
	l <- cbind(l, predict(samp$fit, type="response"))

	## plot the points
	if(any(samp$n != 1)) {
	plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(l)), type="n", ylab="y")
	text(x, samp$y/samp$n, samp$n, cex=2, col=i)
	} else {
	plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(l)), ylab="y")
	}

	## plot the accumulated regression lines
	matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
	ab <- rbind(ab, coef(samp$fit))
	title(paste(samp$name, ": m = a + b*x, average (a,b)=(",
	signif(mean(ab[,1]),2), ",",
	signif(mean(ab[,2]),2), ")", sep=""))

	## continue sampling?
	if(readline("press RETURN to continue, q to stop: ") == "q") break
	}

	## plot the resulting mean regression line and quantiles
	m <- apply(l, 1, mean);
	plot(x, m, type="l", lwd=2, ylab="y")
	title(paste(samp$name, ": predictive mean and interval", sep=""))
	q1 <- apply(l, 1, quantile, 0.05)
	lines(x, q1, lwd=2, col=2)
	q2 <- apply(l, 1, quantile, 0.95)
	lines(x, q2, lwd=2, col=2)
	}




	## ex.mc:
	##
	## monte-carlo sampling from the provided sampling function
	## (one of the three GLM functions above) with ML fits,
	## and finally summarize the estimates with means and
	## errorbars

	## linf:
	##
	## generate from a standard linear model with covariate
	## x, intercept alpha and slope beta -- then fit a linear
	## model (ML) to the result. return the sample, the
	## fit and a descriptive name, n is a vector of ones to be
	## compatible with logitf

	quadf <- function(x, alpha, beta){
	epsilon <- rnorm(length(x))
	x2<-x^2
	y <- alpha + beta*x - x2 + epsilon
	fit <- lm(y ~ x+x2)
	return(list(y=y, fit=fit, n=rep(1,length(y)), name="quadratic"))
	}




	ex.mc2 <- function(x, alpha, beta, sampf=linf) {

	## initialization
	i <- 0
	par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
	l <- ab <- ys <- NULL

	## while the user wishes to continue
	while(TRUE) {

	## sample with the sampling function sampf
	i <- i+1
	samp <- sampf(x, alpha, beta)
	l <- cbind(l, predict(samp$fit, type="response"))
	ys<- cbind(ys, samp$y/samp$n)

	## plot the points
	if(any(samp$n != 1)) {
	plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(ys)), type="n", ylab="y")
	text(x, samp$y/samp$n, samp$n, cex=2, col=i)
	} else {
	plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(ys)), ylab="y")
	}

	## plot the accumulated regression lines
	matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
	matplot(x, ys, col=1:i, pch=20, add=TRUE)
	ab <- rbind(ab, coef(samp$fit))
	#title(paste(samp$name, ": m = a + b*x, (a,b)=(",
	# signif((ab[i,1]),2), ",",
	# signif((ab[i,2]),2), ")", sep=""))

	## continue sampling?
	if(readline("press RETURN to continue, q to stop: ") == "q") break
	}

	## plot the resulting mean regression line and quantiles
	m <- apply(l, 1, mean);
	plot(x, m, type="l", lwd=2, ylab="y", ylim=range(as.vector(ys)))
	title(paste(samp$name, ": predictive mean, CI, and interval", sep=""))
	q1 <- apply(l, 1, quantile, 0.025)
	lines(x, q1, lwd=2, col=2)
	q2 <- apply(l, 1, quantile, 0.975)
	lines(x, q2, lwd=2, col=2)

	qp1 <- apply(ys, 1, quantile, 0.025)
	qp2 <- apply(ys, 1, quantile, 0.975)
	lines(x, qp2, lwd=2, col=2, lty=2)
	lines(x, qp1, lwd=2, col=2, lty=2)

	}