Skip to content

Instantly share code, notes, and snippets.

@seabbs
Created September 11, 2018 09:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seabbs/09029b1421523691b26205dfd48f93e1 to your computer and use it in GitHub Desktop.
Save seabbs/09029b1421523691b26205dfd48f93e1 to your computer and use it in GitHub Desktop.
Playing with an example GAM model using TB data
if (!require("pacman")) install.packages("pacman")
p_load("getTBinR")
p_load("tidyverse")
p_load("mgcv")
p_load("zoo")
tb <- get_tb_burden()
tb_features <- tb %>%
filter(e_inc_100k >= 200, e_inc_num > 100) %>%
select(country, g_whoregion, year, e_pop_num, e_inc_num, e_inc_100k) %>%
group_by(country) %>%
mutate(e_pop_num = na.locf(e_pop_num),
e_inc_num = na.locf(e_inc_num),
e_inc_100k = na.locf(e_inc_100k)) %>%
ungroup %>%
mutate(country = factor(country),
g_whoregion = factor(g_whoregion))
tb_features
train <- tb_features %>%
filter(year < 2016)
test <- tb_features %>%
anti_join(train)
## Baseline model
baseline <- gam(e_inc_100k ~ s(year), data = train, family = gaussian)
summary(baseline)
plot(baseline)
gam.check(baseline)
## Include region
region <- gam(e_inc_100k ~ s(year) + g_whoregion,
data = train,
family = gaussian)
summary(region)
plot(region)
AIC(baseline, region)
## Include yearly trend changes by region
region_trend <- gam(e_inc_num ~ s(year, by = g_whoregion) + g_whoregion,
data = train,
family = gaussian)
summary(region_trend)
AIC(region, region_trend)
## Include country
country <- gam(e_inc_100k ~ s(year) + g_whoregion + country,
data = train,
family = gaussian)
summary(country)
plot(country)
AIC(region, country)
## Include changing trends by region when country is adjusted for
country_region_trend <- gam(e_inc_100k ~ s(year, by = g_whoregion) + g_whoregion + country,
data = train,
family = gaussian)
summary(country_region_trend)
plot(country_region_trend)
AIC(country, country_region_trend)
## Include changing trend by country and not region
country_trend <- gam(e_inc_100k ~ s(year, by = country) + g_whoregion + country,
data = train,
family = gaussian)
summary(country_trend)
plot(country_trend)
AIC(country, country_region_trend, country_trend)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment