Skip to content

Instantly share code, notes, and snippets.

@skgrange
Last active November 19, 2021 09:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skgrange/1d5b2a51f478317bd0ccd9491eeb17c1 to your computer and use it in GitHub Desktop.
Save skgrange/1d5b2a51f478317bd0ccd9491eeb17c1 to your computer and use it in GitHub Desktop.
Example of training multiple linear regression (MLR) models to predict oxidative potential (OP) with other particulate matter (PM) constituents with simulated observations
# Define the helper functions ---------------------------
# https://stats.stackexchange.com/questions/83826/is-a-weighted-r2-in-robust-linear-model-meaningful-for-goodness-of-fit-analys/87085
r2w <- function(model, observed){
SSe <- sum(model$w * (model$resid) ^ 2)
observed <- model$resid+model$fitted
SSt <- sum((model$w * observed - mean(model$w * observed)) ^ 2)
value <- 1 - SSe / SSt;
return(value)
}
calculate_vif <- function(model) {
if (length(model$coefficients) >= 2) {
suppressWarnings(
x <- model %>%
car::vif() %>%
tibble::enframe()
)
} else {
x <- tibble()
}
return(x)
}
# Package set-up and checking ---------------------------
# Check if the packages are installed, if not, install them
if (!"threadr" %in% rownames(installed.packages())) {
# Install helper package
install.packages("devtools")
# Install threadr from GitHub
remotes::install_github("skgrange/threadr")
}
# Install MASS if needed
if (!"MASS" %in% rownames(installed.packages())) {
install.packages("MASS")
}
# Install car if needed
if (!"car" %in% rownames(installed.packages())) {
install.packages("car")
}
# Install modelr if needed
if (!"modelr" %in% rownames(installed.packages())) {
install.packages("modelr")
}
# Install broom if needed
if (!"broom" %in% rownames(installed.packages())) {
install.packages("broom")
}
# Load the helper package
library(threadr)
# Load an example data set which has been simulated ---------------------------
# Load data and group data for iteration, this is a nested tibble
# Within the nested set, `value` is the oxidative potential measurement
data_op_nest_with_formulas_simulated <- devtools::source_gist(
"https://gist.github.com/skgrange/57eb5c6116b13ca6902658bb83e085f2"
)$value %>%
rowwise(field_campaign,
where(is.character))
# Print the observations
data_op_nest_with_formulas_simulated
# Keep modelling reproducible
set.seed(123)
# Do the modelling ---------------------------
data_op_nest_models <- data_op_nest_with_formulas_simulated %>%
mutate(
model = list(
MASS::rlm(
as.formula(formula),
data = observations,
method = "MM",
maxit = 200
)
),
model_glance = list(broom::glance(model)),
model_tidy = list(broom::tidy(model)),
n_variables = nrow(model_tidy),
r_squared = r2w(model),
deviance = deviance(model),
vif = list(calculate_vif(model)),
vif_max = max(vif$value),
vif_max = if_else(is.infinite(vif_max), NA_real_, vif_max),
negative_estimate = any(model_tidy$estimate < 0),
observations = list(modelr::add_predictions(observations, model, "value_predict"))
)
# Print return
data_op_nest_models
# Summarise the models' terms
data_op_nest_models %>%
summarise(model_tidy,
.groups = "drop")
# Continue with the analysis....
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment