Skip to content

Instantly share code, notes, and snippets.

@aschleg
Created August 4, 2016 19:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aschleg/7299355048c50907db27b672f98c05c6 to your computer and use it in GitHub Desktop.
Save aschleg/7299355048c50907db27b672f98c05c6 to your computer and use it in GitHub Desktop.
Example function for fitting a multiple regression model with two predictor variables
# Simple function for fitting a regression line with two predictor variables.
# Function takes two arguments:
# x must be a matrix or dataframe with only two columns (variables)
# y must be a vector containing the response values of the same length
# Output is similar to the lm() function
# Function used as a demonstration in post on multiple regression here: http://wp.me/p4aZEo-5Lh
two.predictor.regression <- function(x, y) {
y <- as.matrix(y)
if (ncol(y) > 1) {
stop('y must be a vector')
}
x <- data.frame(x)
if (ncol(x) != 2) {
stop('x must have only two predictor variables')
}
x$intercept <- 1 # Add the intercept term to the X matrix
col <- grep('intercept', names(x))
x <- as.matrix(x[, c(col, (1:ncol(x))[-col])]) # Put the intercept term first in the X matrix
n <- length(y)
if (n != length(x[,1])) {
stop('x and y must be the same length')
}
# Degrees of freedom. As only two predictor variables and one response variable are considered, the degrees of freedom will always be 3
df <- 3
x.crossprod <- crossprod(x) # Calculate crossproduct X'X
y.crossprod <- crossprod(y) # Calculate crossproduct Y'Y
xy.crossprod <- crossprod(x, y) # crossproduct X'Y
x.inv <- solve(x.crossprod) # Find inverse of X'X
b <- x.inv %*% xy.crossprod # Get b parameters by multiplying the inverse of X'X and X'Y
fitted.y <- x %*% b # Find the fitted response values
resid.mat <- y - fitted.y # Find the residual values
min.resid <- min(resid.mat)
max.resid <- max(resid.mat)
med.resid <- median(resid.mat)
Q1.resid <- quantile(resid.mat, .25)
Q3.resid <- quantile(resid.mat, .75)
resid.df <- data.frame(round(cbind(min.resid, Q1.resid, med.resid, Q3.resid, max.resid), 3), row.names = '')
colnames(resid.df) <- c('Min', '1Q', 'Median', '3Q', 'Max')
sse <- y.crossprod - crossprod(b, xy.crossprod) # SSE
mse <- sse / (n - df) # MSE
sst <- y.crossprod - sum(y)^2 / n # SST
ssr <- sst - sse # SSR
msr <- ssr / 2 # MSR
f.stat <- msr / mse # Critical F-statistic
p.val <- pf(f.stat, df1 = 2, df2 = n - df, lower.tail = FALSE) # Computed p-value
R2 <- 1 - sse / sst # R-squared
R2.adj <- 1 - ((n - 1) / (n - df)) * sse / sst # adjusted R-squared
s2b <- mse[1] * x.inv # Estimate variance-covariance matrix of regression parameters
# Get the parameter standard errors
std.err <- suppressWarnings(matrix(sqrt(s2b)[!is.na(sqrt(s2b))], nrow = 3, ncol=1))
t.values <- b / std.err # Critical t-values of regression parameters
p.values <- 2 * pt(t.values, df = n - df, lower.tail = FALSE)
res.std.err <- sqrt(mse)
# The following collects and returns all the results found above into a list
coef <- data.frame(cbind(round(b,3), round(std.err,3), round(t.values,3), format(p.values, digits = 3)))
colnames(coef) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
resids <- paste('Residual standard error:', round(res.std.err, 3), 'on', length(delivery$n.prod - df), ' degrees of freedom')
R.squared <- paste('Multiple R-squared: ', round(R2,4), 'Adjusted R-squared: ', round(R2.adj,4))
f.stat <- paste('F-statistic: ', round(f.stat, 1), 'on 2 and', length(delivery$n.prod - df), ' DF')
p <- paste('p-value: ', format(p.val, digits = 4, scientific = TRUE))
res <- list('Residuals'=resid.df, 'Coefficients'=coef, resids, R.squared, f.stat, p)
return(res)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment