Last active
November 3, 2020 00:17
-
-
Save kgilbert-cmu/86a17a26fbc777d7bd52 to your computer and use it in GitHub Desktop.
Formulas in R are hard to manipulate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Kevin Gilbert | |
# CMU CS '14 | |
# Formulas in R are hard to manipulate, and cannot have string functions applied to them. This fixes that. | |
# ---------- ---------- ---------- ---------- ---------- | |
# given two formulas, merge their predictors | |
# X ~ m + n and X ~ y + z => X ~ m + n + y + z | |
formula.merge = function(A, B) { | |
tmpA = terms(A) | |
tmpB = terms(B) | |
resp = dimnames(attr(tmpA,"factors"))[[1]][1] # I assume A and B have the same response variable | |
pred = c(attr(tmpA, "term.labels"), attr(tmpB, "term.labels")) | |
return (as.formula(paste(paste(resp, " ~ "), paste(pred, collapse = "+")))) | |
} | |
# by default, wrap all predictors X with s(X), typically for mgcv | |
# X ~ y + z => X ~ s(y) + s(z) | |
formula.wrap = function(model, f = function(x) paste(c("s(",x,")"), collapse = "")) { | |
tmp = terms(model) | |
resp = dimnames(attr(tmp,"factors"))[[1]][1] | |
pred = attr(tmp, "term.labels") | |
return(as.formula(paste(paste(resp, " ~ "), paste(sapply(pred, f), collapse = "+")))) | |
} | |
# ---------- ---------- ---------- ---------- ---------- | |
# Please refer to 36-402 lecture on Additive Models | |
# ...you can now do MedianHouseValue ∼ s(.)! | |
look_at_that_formula = formula.wrap(lm(log(MedianHouseValue) ~ ., data=calif)) | |
# log(MedianHouseValue) ~ s(MedianIncome) + s(MedianHouseAge) + | |
# s(TotalRooms) + s(TotalBedrooms) + s(Population) + s(Households) + | |
# s(Latitude) + s(Longitude) | |
addfit = gam(look_at_that_formula, data=calif) | |
addfit2 = update(addfit, ~ . - s(Latitude) - s(Longitude) + s(Latitude, Longitude)) | |
# log(MedianHouseValue) ~ s(MedianIncome) + s(MedianHouseAge) + | |
# s(TotalRooms) + s(TotalBedrooms) + s(Population) + s(Households) + | |
# s(Latitude, Longitude) | |
# ---------- ---------- ---------- ---------- ---------- | |
# formula.merge is useful for keeping main effects and interactions separate | |
library(datasets) | |
main_effects = formula(lm(mpg ~ ., data=mtcars)) | |
interactions = formula(mpg ~ hp:cyl + disp:wt) | |
formula.merge(main_effects, interactions) | |
# mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb + | |
# hp:cyl + disp:wt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment