Last active
November 19, 2021 09:24
-
-
Save skgrange/1d5b2a51f478317bd0ccd9491eeb17c1 to your computer and use it in GitHub Desktop.
Example of training multiple linear regression (MLR) models to predict oxidative potential (OP) with other particulate matter (PM) constituents with simulated observations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define the helper functions --------------------------- | |
# https://stats.stackexchange.com/questions/83826/is-a-weighted-r2-in-robust-linear-model-meaningful-for-goodness-of-fit-analys/87085 | |
r2w <- function(model, observed){ | |
SSe <- sum(model$w * (model$resid) ^ 2) | |
observed <- model$resid+model$fitted | |
SSt <- sum((model$w * observed - mean(model$w * observed)) ^ 2) | |
value <- 1 - SSe / SSt; | |
return(value) | |
} | |
calculate_vif <- function(model) { | |
if (length(model$coefficients) >= 2) { | |
suppressWarnings( | |
x <- model %>% | |
car::vif() %>% | |
tibble::enframe() | |
) | |
} else { | |
x <- tibble() | |
} | |
return(x) | |
} | |
# Package set-up and checking --------------------------- | |
# Check if the packages are installed, if not, install them | |
if (!"threadr" %in% rownames(installed.packages())) { | |
# Install helper package | |
install.packages("devtools") | |
# Install threadr from GitHub | |
remotes::install_github("skgrange/threadr") | |
} | |
# Install MASS if needed | |
if (!"MASS" %in% rownames(installed.packages())) { | |
install.packages("MASS") | |
} | |
# Install car if needed | |
if (!"car" %in% rownames(installed.packages())) { | |
install.packages("car") | |
} | |
# Install modelr if needed | |
if (!"modelr" %in% rownames(installed.packages())) { | |
install.packages("modelr") | |
} | |
# Install broom if needed | |
if (!"broom" %in% rownames(installed.packages())) { | |
install.packages("broom") | |
} | |
# Load the helper package | |
library(threadr) | |
# Load an example data set which has been simulated --------------------------- | |
# Load data and group data for iteration, this is a nested tibble | |
# Within the nested set, `value` is the oxidative potential measurement | |
data_op_nest_with_formulas_simulated <- devtools::source_gist( | |
"https://gist.github.com/skgrange/57eb5c6116b13ca6902658bb83e085f2" | |
)$value %>% | |
rowwise(field_campaign, | |
where(is.character)) | |
# Print the observations | |
data_op_nest_with_formulas_simulated | |
# Keep modelling reproducible | |
set.seed(123) | |
# Do the modelling --------------------------- | |
data_op_nest_models <- data_op_nest_with_formulas_simulated %>% | |
mutate( | |
model = list( | |
MASS::rlm( | |
as.formula(formula), | |
data = observations, | |
method = "MM", | |
maxit = 200 | |
) | |
), | |
model_glance = list(broom::glance(model)), | |
model_tidy = list(broom::tidy(model)), | |
n_variables = nrow(model_tidy), | |
r_squared = r2w(model), | |
deviance = deviance(model), | |
vif = list(calculate_vif(model)), | |
vif_max = max(vif$value), | |
vif_max = if_else(is.infinite(vif_max), NA_real_, vif_max), | |
negative_estimate = any(model_tidy$estimate < 0), | |
observations = list(modelr::add_predictions(observations, model, "value_predict")) | |
) | |
# Print return | |
data_op_nest_models | |
# Summarise the models' terms | |
data_op_nest_models %>% | |
summarise(model_tidy, | |
.groups = "drop") | |
# Continue with the analysis.... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment