Skip to content

Instantly share code, notes, and snippets.

@mdietze
Created August 1, 2019 11:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdietze/6c9312969f002fb82292486214b4ae56 to your computer and use it in GitHub Desktop.
Save mdietze/6c9312969f002fb82292486214b4ae56 to your computer and use it in GitHub Desktop.
NEFI2019 Course: Monte Carlo credible and predictive interval simulation
## linear.R -- to accompany Module 4 of
## Methods of Regression
##
## by Leah R Johnson, Stats, VT
## load the ex.mc2 function
source("sampling2.R")
## set inputs (covariates)
x <- seq(-3,2,length=30)
## set regression coefficients alpha=beta0, beta=beta1, quad one is hard coded
alpha <- 10; beta <- 0.1
plot(x, alpha+beta*x-1*x^2, type="l")
## monte-carlo linear fits
ex.mc2(x, alpha, beta, sampf=quadf)
## Code provided by:
## Leah R Johnson 30 July 2019
## adapted from code by Robert B Gramacy
## Virginia Tech Department of Statistics
##
##
## see e.g., linear.R for examples of how to use the functions
## contained in this file
## linf:
##
## generate from a standard linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result. return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf
linf <- function(x, alpha, beta)
{
epsilon <- rnorm(length(x))
y <- alpha + beta*x + epsilon
fit <- lm(y ~ x)
return(list(y=y, fit=fit, n=rep(1,length(y)), name="linear"))
}
## loginf:
##
## generate from a log-linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result. return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf
loglinf <- function(x, alpha, beta)
{
mu <- exp(alpha + beta*x)
y <- rpois(length(mu), mu)
fit <- glm(y ~ x, family="poisson")
return(list(y=y, fit=fit, n=rep(1,length(y)), name="log-linear"))
}
## logit:
##
## generate from a binomial-logit model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result. return the sample, the
## fit, the total counts, and a descriptive name
logitf <- function(x, alpha, beta)
{
n <- rpois(length(x), lambda=2)+1
p <- exp(alpha + beta*x)/(1+exp(alpha + beta*x))
y <- rbinom(length(n), n, prob=p)
fit <- glm(y/n ~ x, family="binomial", weights=n)
return(list(y=y, fit=fit, n=n, name="logit"))
}
## three.ex:
##
## sample (plot) data from a standard linear model, a log-linear
## model and a binomial-logit model, with options to add the
## MLE regression line to the plot (and regression coeffs)
three.ex <- function(x, alpha, beta, infer=FALSE) {
## for plotting on a 2x2 grid
par(mfrow=c(2,2), cex.main=2, cex.lab=2, cex.axis=2)
## simple linear model
lin <- linf(x, alpha, beta)
plot(x,lin$y, ylab="y", cex=2)
## plot ML inference for the linear model
if(infer) {
p.lin <- predict(lin$fit, se.fit=TRUE)
lines(x, p.lin$fit, lwd=2)
lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
title(paste(lin$name, ": m = a + b*x, (a,b)=(",
signif(coef(lin$fit)[1],2), ",", signif(coef(lin$fit)[2],2),
")", sep=""))
} else title(lin$name)
## simple log-linear model
loglin <- loglinf(x, alpha, beta)
plot(x, loglin$y, ylab="y", cex=2)
## ML inference for the log-linear model
if(infer) {
p.lin <- predict(loglin$fit, type="response", se.fit=TRUE)
lines(x, p.lin$fit, lwd=2)
lines(x, p.lin$fit+2*p.lin$se.fit, col=2, lty=2, lwd=2)
lines(x, p.lin$fit-2*p.lin$se.fit, col=2, lty=2, lwd=2)
title(paste(loglin$name, ": log(m) = a + b*x, (a,b)=(",
signif(coef(loglin$fit)[1],2), ",",
signif(coef(loglin$fit)[2],2), ")", sep=""))
} else title(loglin$name)
## simple logit model
logit <- logitf(x, alpha, beta)
plot(x, logit$y/logit$n, type="n", ylab="y")
text(x, logit$y/logit$n, logit$n, cex=2)
## ML inference for the logit model
if(infer) {
p.logit <- predict(logit$fit, type="response", se.fit=TRUE)
lines(x, p.logit$fit, lwd=2)
lines(x, p.logit$fit+2*p.logit$se.fit, col=2, lty=2, lwd=2)
lines(x, p.logit$fit-2*p.logit$se.fit, col=2, lty=2, lwd=2)
title(paste(logit$name, ": log(m/(1-m)) = a + b*x, (a,b)=(",
signif(coef(logit$fit)[1],2), ",",
signif(coef(logit$fit)[2],2), ")", sep=""))
} else title("logit")
}
## ex2.mc:
##
## monte-carlo sampling from the provided sampling function
## (one of the three GLM functions above) with ML fits,
## and finally summarize the estimates with means and
## errorbars
ex.mc <- function(x, alpha, beta, sampf=linf) {
## initialization
i <- 0
par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
l <- ab <- NULL
## while the user wishes to continue
while(TRUE) {
## sample with the sampling function sampf
i <- i+1
samp <- sampf(x, alpha, beta)
l <- cbind(l, predict(samp$fit, type="response"))
## plot the points
if(any(samp$n != 1)) {
plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(l)), type="n", ylab="y")
text(x, samp$y/samp$n, samp$n, cex=2, col=i)
} else {
plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(l)), ylab="y")
}
## plot the accumulated regression lines
matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
ab <- rbind(ab, coef(samp$fit))
title(paste(samp$name, ": m = a + b*x, average (a,b)=(",
signif(mean(ab[,1]),2), ",",
signif(mean(ab[,2]),2), ")", sep=""))
## continue sampling?
if(readline("press RETURN to continue, q to stop: ") == "q") break
}
## plot the resulting mean regression line and quantiles
m <- apply(l, 1, mean);
plot(x, m, type="l", lwd=2, ylab="y")
title(paste(samp$name, ": predictive mean and interval", sep=""))
q1 <- apply(l, 1, quantile, 0.05)
lines(x, q1, lwd=2, col=2)
q2 <- apply(l, 1, quantile, 0.95)
lines(x, q2, lwd=2, col=2)
}
## ex.mc:
##
## monte-carlo sampling from the provided sampling function
## (one of the three GLM functions above) with ML fits,
## and finally summarize the estimates with means and
## errorbars
## linf:
##
## generate from a standard linear model with covariate
## x, intercept alpha and slope beta -- then fit a linear
## model (ML) to the result. return the sample, the
## fit and a descriptive name, n is a vector of ones to be
## compatible with logitf
quadf <- function(x, alpha, beta){
epsilon <- rnorm(length(x))
x2<-x^2
y <- alpha + beta*x - x2 + epsilon
fit <- lm(y ~ x+x2)
return(list(y=y, fit=fit, n=rep(1,length(y)), name="quadratic"))
}
ex.mc2 <- function(x, alpha, beta, sampf=linf) {
## initialization
i <- 0
par(mfrow=c(1,1), cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
l <- ab <- ys <- NULL
## while the user wishes to continue
while(TRUE) {
## sample with the sampling function sampf
i <- i+1
samp <- sampf(x, alpha, beta)
l <- cbind(l, predict(samp$fit, type="response"))
ys<- cbind(ys, samp$y/samp$n)
## plot the points
if(any(samp$n != 1)) {
plot(x, samp$y/samp$n, cex=2, ylim=range(as.vector(ys)), type="n", ylab="y")
text(x, samp$y/samp$n, samp$n, cex=2, col=i)
} else {
plot(x, samp$y/samp$n, cex=2, col=i, ylim=range(as.vector(ys)), ylab="y")
}
## plot the accumulated regression lines
matplot(x, l, lwd=2, col=1:i, type="l", lty=1, add=TRUE, ylab="y")
matplot(x, ys, col=1:i, pch=20, add=TRUE)
ab <- rbind(ab, coef(samp$fit))
#title(paste(samp$name, ": m = a + b*x, (a,b)=(",
# signif((ab[i,1]),2), ",",
# signif((ab[i,2]),2), ")", sep=""))
## continue sampling?
if(readline("press RETURN to continue, q to stop: ") == "q") break
}
## plot the resulting mean regression line and quantiles
m <- apply(l, 1, mean);
plot(x, m, type="l", lwd=2, ylab="y", ylim=range(as.vector(ys)))
title(paste(samp$name, ": predictive mean, CI, and interval", sep=""))
q1 <- apply(l, 1, quantile, 0.025)
lines(x, q1, lwd=2, col=2)
q2 <- apply(l, 1, quantile, 0.975)
lines(x, q2, lwd=2, col=2)
qp1 <- apply(ys, 1, quantile, 0.025)
qp2 <- apply(ys, 1, quantile, 0.975)
lines(x, qp2, lwd=2, col=2, lty=2)
lines(x, qp1, lwd=2, col=2, lty=2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment