Skip to content

Instantly share code, notes, and snippets.

@TimTeaFan
Last active October 1, 2022 19:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TimTeaFan/c9f74f22ea1559ab9ad85edc26a5c809 to your computer and use it in GitHub Desktop.
Save TimTeaFan/c9f74f22ea1559ab9ad85edc26a5c809 to your computer and use it in GitHub Desktop.
many model approach with multi-level data and models
library(tidyverse)
library(gapminder)
# lists of independent variables
indep_var_country <- list(
"gdp_only" = "gdpPercap",
"gdp_and_pop" = c("pop", "gdpPercap")
)
indep_var_continent <- append(
indep_var_country,
list("gpd_pop_country" = c("pop", "gdpPercap", "country"))
)
indep_var_world <- append(
indep_var_continent,
list("gpd_pop_cont" = c("pop", "gdpPercap", "continent"))
)
# create base dataset with two extra grouping columns starting with `grp_`
gap_base <- gapminder %>%
mutate(grp_continent = continent,
grp_country = country,
.before = 1L)
# dataset for country levels expanded with indep vars on country level
gap_country <- gap_base %>%
expand_grid(indep_vars = indep_var_country)
# dataset for continent levels expanded with indep vars on continent level
gap_continent <- gap_base %>%
mutate(grp_country = grp_continent) %>%
expand_grid(indep_vars = indep_var_continent)
# dataset for world expanded with indep vars on world level
gap_world <- gap_base %>%
mutate(grp_country = "World",
grp_continent = "World") %>%
expand_grid(indep_vars = indep_var_world)
# lets bind the datasets together and nest and arrange them
gap_nested <- bind_rows(
"country" = gap_country,
"continent" = gap_continent,
"world" = gap_world,
.id = "level"
) %>%
# lets add the names of the model groups as column
mutate(mod_group = names(indep_vars)) %>%
# lets nest_by and proceed rowwise
nest_by(level, mod_group, grp_continent, grp_country, indep_vars) %>%
# show "world" and "continent" level models first
# and show model groups in order from gdp_only to gpd_pop_country_cont
arrange(level != "world",
level == "country",
nchar(mod_group))
gap_res <- gap_nested %>%
mutate(
# create models using the list of characters in indep_vars
mod = list(lm(reformulate(indep_vars, "lifeExp"),
data = data)),
# get model estimates
res = list(broom::tidy(mod)),
# get model statistics
stats = list(broom::glance(mod))
) %>%
# ungroup and only select relevant columns
ungroup() %>%
select(level, mod_group, grp_continent, grp_country, res, stats)
# Create a tibble with model statistics
gap_stats <- gap_res %>%
unnest(stats) %>%
select(-res)
# Create a tibble with model estimates and p-values
gap_estimates <- gap_res %>%
unnest(res) %>%
filter(term != "(Intercept)") %>%
select(-stats)
# from here on we can filter and extract the model data and choose what we want to plot
# Example: which independent variables work best on which level on average:
gap_stats %>%
group_by(level, mod_group) %>%
summarise(adjr2 = mean(adj.r.squared)) %>%
arrange(desc(adjr2))
# A tibble: 9 × 3
# Groups: level [3]
# level mod_group adjr2
# <chr> <chr> <dbl>
# 1 country gdp_and_pop 0.859
# 2 world gpd_pop_country 0.770
# 3 continent gpd_pop_country 0.691
# 4 country gdp_only 0.598
# 5 world gpd_pop_cont 0.581
# 6 continent gdp_and_pop 0.441
# 7 continent gdp_only 0.431
# 8 world gdp_and_pop 0.346
# 9 world gdp_only 0.340
# ...
# to be continued
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment