Last active
March 27, 2019 16:35
-
-
Save hanowell/4ff45c90fdcea9f0faafcbd3a04a0055 to your computer and use it in GitHub Desktop.
how_to_measure_attrition_part_2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load packages ---- | |
library(tidyverse) | |
# Create US-state-level headcount dataset ---- | |
business_markets <- tibble::tribble( | |
~city_name, ~state_name, ~mean_headcount, | |
"Austin", "TX", 8, | |
"Denver", "CO", 22, | |
"Los Angeles", "CA", 97, | |
"Minneapolis", "MN", 7, | |
"New York", "NY", 14, | |
"Portland", "OR", 80, | |
"San Francisco", "CA", 145, | |
"Seattle", "WA", 198, | |
"St. Paul", "MN", 2 | |
) | |
# Parameterize the true distribution of city-level attrition rates ---- | |
attrition_mean <- 0.25 | |
attrition_variance <- 0.04 | |
alpha <- (attrition_mean^2)/attrition_variance | |
beta <- attrition_mean/attrition_variance | |
# Function to sample true city-level attrition rates ---- | |
sample_true_attrition <- function(data) { | |
data %>% | |
dplyr::mutate(true_attrition_rate = rgamma(length(city_name), alpha, beta)) | |
} | |
# Simulate terminations ---- | |
one_iteration <- function(data, iteration) { | |
data %>% | |
sample_true_attrition() %>% | |
dplyr::rowwise() %>% | |
dplyr::mutate( | |
terminations = rpois(1, true_attrition_rate * mean_headcount) | |
) %>% | |
dplyr::ungroup() %>% | |
dplyr::mutate(iteration = iteration) | |
} | |
multiple_iterations <- function(data, iterations) { | |
1:iterations %>% | |
purrr::map(., ~one_iteration(data, .)) %>% | |
dplyr::bind_rows() | |
} | |
simulations <- multiple_iterations(business_markets, 10000) %>% | |
dplyr::mutate( | |
observed_attrition_rate = terminations / mean_headcount | |
, absolute_error = abs(observed_attrition_rate - true_attrition_rate) | |
, absolute_p_error = absolute_error / true_attrition_rate | |
) | |
# Summarize Type M errors ---- | |
error_summary <- simulations %>% | |
dplyr::group_by(city_name, state_name, mean_headcount) %>% | |
dplyr::summarize(mape = mean(absolute_p_error) | |
, pr_zero = mean(terminations == 0) | |
, pr_all = mean(terminations == mean_headcount) | |
, pr_greater = mean(terminations > mean_headcount)) %>% | |
dplyr::ungroup() | |
# Summarize probabilities of extreme outcomes ---- | |
pr_extreme <- error_summary %>% | |
dplyr::select(city_name | |
, starts_with("pr_")) %>% | |
tidyr::gather(pr_type, pr, -city_name) %>% | |
dplyr::left_join(business_markets) %>% | |
dplyr::mutate(pr_type = case_when( | |
pr_type == "pr_zero" ~ "% none" | |
, pr_type == "pr_all" ~ "% all" | |
, pr_type == "pr_greater" ~ "% above headcount" | |
)) %>% | |
dplyr::mutate(pr_type = factor(pr_type, levels = c("% none" | |
, "% all" | |
, "% above headcount"))) | |
# Plot headcount by market ---- | |
ggplot2::ggplot(data = business_markets) + | |
ggplot2::aes(x = reorder(city_name, mean_headcount) | |
, y = mean_headcount) + | |
ggplot2::ylim(0, 220) + | |
ggplot2::geom_bar(stat = "identity", width = 0.01) + | |
ggplot2::geom_text(aes(label = mean_headcount), hjust = -1) + | |
ggplot2::geom_point() + | |
ggplot2::coord_flip() + | |
ggplot2::theme_minimal() + | |
ggplot2::theme(panel.grid = element_blank()) + | |
ggplot2::labs(title = "Mean annual headcount by business market" | |
, subtitle = "(for a totally fake company)" | |
, x = NULL | |
, y = NULL) | |
ggsave("./part_2/headcount.png" | |
, device = png() | |
, units = "in" | |
, width = 5 | |
, height = 3 | |
, dpi = 1200) | |
dev.off() | |
# Plot mean absolute percent error ---- | |
ggplot2::ggplot(data = error_summary) + | |
ggplot2::aes(x = mean_headcount, y = mape) + | |
ggplot2::geom_line() + | |
ggplot2::scale_y_continuous(labels = scales::percent) + | |
ggplot2::labs( | |
title = "Mean absolute % error (MAPE) of observed attrition rates" | |
, subtitle = "Error decreases exponentially with headcount" | |
, x = "Mean annual headcount" | |
, y = "MAPE" | |
) + | |
ggplot2::theme_minimal() + | |
ggplot2::theme(panel.grid = element_blank() | |
, axis.title.y = element_text(angle = 0)) | |
ggsave("./part_2/mape.png" | |
, device = png() | |
, units = "in" | |
, width = 6 | |
, height = 3.5 | |
, dpi = 1200) | |
dev.off() | |
# Plot probabilities of extreme attrition observations ---- | |
ggplot2::ggplot(data = pr_extreme) + | |
ggplot2::aes(x = mean_headcount, y = pr) + | |
ggplot2::geom_line() + | |
ggplot2::scale_y_continuous(labels = scales::percent) + | |
ggplot2::facet_wrap(. ~ pr_type) + | |
ggplot2::labs( | |
title = "% of simulations with extreme attrition rates" | |
, subtitle = "Extreme observations less likely for higher headcount" | |
, x = "Mean annual headcount" | |
, y = "% of simulations" | |
) + | |
ggplot2::theme(axis.title.y = element_text(angle = 0)) | |
ggsave("./part_2/pr_extreme.png" | |
, device = png() | |
, units = "in" | |
, width = 6 | |
, height = 2 | |
, dpi = 1200) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment