Skip to content

Instantly share code, notes, and snippets.

@aschleg
Last active July 6, 2016 21:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aschleg/618d6911ce64e82bf140af1080edbf13 to your computer and use it in GitHub Desktop.
Save aschleg/618d6911ce64e82bf140af1080edbf13 to your computer and use it in GitHub Desktop.
Quick function for performing linear regression with one predictor variable
# Quick function for performing linear regression with only one predictor variable.
# Takes two arguments, x and y. Variables must have same length.
# Output will be similar to output of lm() with one predictor.
# Gist created as part of post on linear regression: http://www.aaronschlegel.com/notebook/simple-linear-regression-models-r/
simple.linear.coef <- function(x, y) {
# Find length of x to get sample size. Assuming x and y have the same sample size.
n <- length(x)
if (n != length(y))
stop('variables are not the same size')
# Calculate the error statistics Sxx, Syy, and Sxy
sxx <- sum(x^2) - sum(x)^2 / n
syy <- sum(y^2) - sum(y)^2 / n
sxy <- sum(x * y) - (sum(x) * sum(y)) / n
# Coefficients beta0 and beta1
b1 <- sxy / sxx
b0 <- mean(y) - b1 * mean(x)
# Sum of standard error and Mean Standard Error
sse <- syy - sxy^2 / sxx
mse <- sse / (n - 2)
# Standard error beta0 and beta1
b1.err <- sqrt(mse) / sqrt(sxx)
b0.err <- sqrt(mse) / sqrt(n) * sqrt(1 + (mean(x)^2 / (sum((x - mean(x))^2) / n)))
# beta0 and beta1 t-values
b0.t <- (b0 - 0) / b0.err
b1.t <- (b1 - 0) / b1.err
# p-values of beta0 and beta1
b0.p <- 2 * pt(b0.t, df = n - 2)
b1.p <- 2 * pt(b1.t, df = n - 2, lower.tail = FALSE)
# Coefficient of determination R-squared
r2 <- (syy - sse) /syy
# R-squared adjusted
r2.adj <- r2 - (1 - r2) * ((2 - 1) / (length(y) - 2))
rsquare <- paste('Multiple R-squared: ', round(r2, 4), ', Adjusted R-squared: ', round(r2.adj, 4))
coeffs <- data.frame(cbind(c(b0, b1), c(b0.err, b1.err), c(b0.t, b1.t), c(b0.p, b1.p)))
colnames(coeffs) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
rownames(coeffs) <- c('Intercept', 'x1')
# Fit the line to the data with beta0 and beta1 found above
fitted <- x * b1 + b0
# The F-Statistic
msr <- sum((fitted - mean(y))^2) / 1
mse2 <- sum((y - fitted)^2) / (length(y) - 2)
f <- msr / mse2
# p-value
p <- pf(f, 1, length(y) - 2, lower.tail = FALSE)
f.stat <- paste('F-statistic: ', round(f, 2), ' on 1 and ', n - 2, ' DF, p-value: ', format(p, digits = 3, scientific = TRUE))
# Calculate and find summary statistics of the residuals
resd <- y - fitted
min.res <- round(min(resd), 3)
max.res <- round(max(resd), 3)
q1.q3 <- quantile(resd, probs = c(.25, .75)) # 1st and 3rd quartiles of the residuals
med <- round(median(resd), 3)
residual <- data.frame(cbind(min.res, round(q1.q3[1], 3), med, round(q1.q3[2], 3), max.res))
colnames(residual) <- c('Min', 'Q1', 'Median', 'Q3', 'Max')
resdi <- paste('Residual standard error: ', round(sqrt(mse2), 2), ' on ', n - 2, ' degrees of freedom')
regres <- list('Residuals'=residual, 'Coefficients'=coeffs, resdi, rsquare, f.stat)
return(regres)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment