Last active
March 23, 2019 20:22
-
-
Save jessecambon/96c253825d907aa0ab5c43e1ba9ba56f to your computer and use it in GitHub Desktop.
Improved Categorical Variable Regression Labels
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Setup | |
library(broom) | |
library(tidyverse) | |
# obtain character list of independent variables in a model object. | |
obtain_model_varlist <- function(model_obj) { | |
var_list_raw <- unlist(strsplit(as.character(formula(model_obj)[3]),split=' \\+ ')) | |
# Remove smooth terms (s()) | |
return(var_list_raw[!str_detect(var_list_raw,'^s\\(')]) | |
} | |
# Find frequency counts for all categorical variables in variable list | |
var_freq <- function(data,var) { | |
var <- rlang::sym(var) | |
print(var) | |
if (is.factor(data %>% pull(!!var)) | is.character(data %>% pull(!!var))) { | |
return(data %>% count(!!var) %>% mutate(term=quo_name(var)) %>% | |
rename(level=!!var) %>% | |
mutate(level=as.character(level), # convert to char | |
is_categorical=1) %>% | |
select(term,everything())) | |
} else { | |
return(tibble()) | |
} | |
} | |
# Iterate through an entire dataset and return a dataset with sample | |
# sizes for all levels of categorical variables | |
find_all_freqs <- function(data,var_list) { | |
all_freqs <- tibble() | |
for (var in var_list) { | |
all_freqs <- all_freqs %>% | |
bind_rows(var_freq(data,var)) | |
} | |
return(all_freqs) | |
} | |
# adds term_name field to a tidy dataframe which includes sample sizes | |
add_termnames <- function(data,term_freqs,var_list) { | |
# Regexs to match the varname (when it begins a string) | |
varregex <- paste(str_replace(var_list,'^','\\^'), collapse = "|") | |
return( | |
data %>% | |
mutate(term_name = str_extract(term,varregex), | |
level = case_when(!is.na(term_name) ~ str_replace(term,varregex,""))) %>% | |
# add in frequency counts and labels | |
left_join(term_freqs,by=c('term_name'='term','level')) %>% | |
mutate(label=case_when(is.na(n) ~ term, # if not categorical than use original label | |
is_categorical == 1 ~ str_c(term_name,': ', level,' (',scales::comma(n),')'), | |
TRUE ~ str_c(level,' (',scales::comma(n),')'))) | |
) | |
} | |
## Build Linear Model | |
Mymtcars <- mtcars %>% | |
mutate(Cylinders=factor(cyl), | |
Gears=factor(gear)) | |
car_model <- lm(mpg ~ Cylinders + disp + Gears,data=Mymtcars) | |
# obtain list of independent variables | |
car_varlist <- obtain_model_varlist(car_model) | |
# sample sizes for categorical variable levels | |
car_freqs <- find_all_freqs(Mymtcars,car_varlist) | |
tidy_car <- tidy(car_model,conf.int=T) %>% | |
add_termnames(car_freqs,car_varlist) | |
glance_car <- glance(car_model) | |
## Plot Coefficients | |
ggplot(data=tidy_car %>% filter(label != '(Intercept)'), | |
aes(x = reorder(term,-estimate), y = estimate)) + | |
geom_point() + | |
scale_y_continuous() + | |
geom_hline(yintercept=0,color='grey') + | |
coord_flip() + | |
theme_bw() + | |
theme(plot.title = element_text(lineheight=1, face="bold",hjust = 0.5)) + | |
geom_pointrange(mapping=aes(ymin=conf.low, ymax=conf.high)) + | |
labs(title='MPG Linear Model - Default Labels', | |
caption='Sample sizes shown in (). Horizontal lines represents 95% confidence intervals.') + | |
xlab('Term') + ylab('Coefficient') | |
ggplot(data=tidy_car %>% filter(label != '(Intercept)'), | |
aes(x = reorder(label,-estimate), y = estimate)) + | |
geom_point() + | |
scale_y_continuous() + | |
geom_hline(yintercept=0,color='grey') + | |
coord_flip() + | |
theme_bw() + | |
theme(plot.title = element_text(lineheight=1, face="bold",hjust = 0.5)) + | |
geom_pointrange(mapping=aes(ymin=conf.low, ymax=conf.high)) + | |
labs(title='MPG Linear Model - With Improved Labels', | |
caption='Sample sizes shown in (). Horizontal lines represents 95% confidence intervals.') + | |
xlab('Term') + ylab('Coefficient') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment