Skip to content

Instantly share code, notes, and snippets.

@hanowell
Last active March 27, 2019 16:35
Show Gist options
  • Save hanowell/4ff45c90fdcea9f0faafcbd3a04a0055 to your computer and use it in GitHub Desktop.
Save hanowell/4ff45c90fdcea9f0faafcbd3a04a0055 to your computer and use it in GitHub Desktop.
how_to_measure_attrition_part_2
# Load packages ----
library(tidyverse)
# Create US-state-level headcount dataset ----
business_markets <- tibble::tribble(
~city_name, ~state_name, ~mean_headcount,
"Austin", "TX", 8,
"Denver", "CO", 22,
"Los Angeles", "CA", 97,
"Minneapolis", "MN", 7,
"New York", "NY", 14,
"Portland", "OR", 80,
"San Francisco", "CA", 145,
"Seattle", "WA", 198,
"St. Paul", "MN", 2
)
# Parameterize the true distribution of city-level attrition rates ----
attrition_mean <- 0.25
attrition_variance <- 0.04
alpha <- (attrition_mean^2)/attrition_variance
beta <- attrition_mean/attrition_variance
# Function to sample true city-level attrition rates ----
sample_true_attrition <- function(data) {
data %>%
dplyr::mutate(true_attrition_rate = rgamma(length(city_name), alpha, beta))
}
# Simulate terminations ----
one_iteration <- function(data, iteration) {
data %>%
sample_true_attrition() %>%
dplyr::rowwise() %>%
dplyr::mutate(
terminations = rpois(1, true_attrition_rate * mean_headcount)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(iteration = iteration)
}
multiple_iterations <- function(data, iterations) {
1:iterations %>%
purrr::map(., ~one_iteration(data, .)) %>%
dplyr::bind_rows()
}
simulations <- multiple_iterations(business_markets, 10000) %>%
dplyr::mutate(
observed_attrition_rate = terminations / mean_headcount
, absolute_error = abs(observed_attrition_rate - true_attrition_rate)
, absolute_p_error = absolute_error / true_attrition_rate
)
# Summarize Type M errors ----
error_summary <- simulations %>%
dplyr::group_by(city_name, state_name, mean_headcount) %>%
dplyr::summarize(mape = mean(absolute_p_error)
, pr_zero = mean(terminations == 0)
, pr_all = mean(terminations == mean_headcount)
, pr_greater = mean(terminations > mean_headcount)) %>%
dplyr::ungroup()
# Summarize probabilities of extreme outcomes ----
pr_extreme <- error_summary %>%
dplyr::select(city_name
, starts_with("pr_")) %>%
tidyr::gather(pr_type, pr, -city_name) %>%
dplyr::left_join(business_markets) %>%
dplyr::mutate(pr_type = case_when(
pr_type == "pr_zero" ~ "% none"
, pr_type == "pr_all" ~ "% all"
, pr_type == "pr_greater" ~ "% above headcount"
)) %>%
dplyr::mutate(pr_type = factor(pr_type, levels = c("% none"
, "% all"
, "% above headcount")))
# Plot headcount by market ----
ggplot2::ggplot(data = business_markets) +
ggplot2::aes(x = reorder(city_name, mean_headcount)
, y = mean_headcount) +
ggplot2::ylim(0, 220) +
ggplot2::geom_bar(stat = "identity", width = 0.01) +
ggplot2::geom_text(aes(label = mean_headcount), hjust = -1) +
ggplot2::geom_point() +
ggplot2::coord_flip() +
ggplot2::theme_minimal() +
ggplot2::theme(panel.grid = element_blank()) +
ggplot2::labs(title = "Mean annual headcount by business market"
, subtitle = "(for a totally fake company)"
, x = NULL
, y = NULL)
ggsave("./part_2/headcount.png"
, device = png()
, units = "in"
, width = 5
, height = 3
, dpi = 1200)
dev.off()
# Plot mean absolute percent error ----
ggplot2::ggplot(data = error_summary) +
ggplot2::aes(x = mean_headcount, y = mape) +
ggplot2::geom_line() +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::labs(
title = "Mean absolute % error (MAPE) of observed attrition rates"
, subtitle = "Error decreases exponentially with headcount"
, x = "Mean annual headcount"
, y = "MAPE"
) +
ggplot2::theme_minimal() +
ggplot2::theme(panel.grid = element_blank()
, axis.title.y = element_text(angle = 0))
ggsave("./part_2/mape.png"
, device = png()
, units = "in"
, width = 6
, height = 3.5
, dpi = 1200)
dev.off()
# Plot probabilities of extreme attrition observations ----
ggplot2::ggplot(data = pr_extreme) +
ggplot2::aes(x = mean_headcount, y = pr) +
ggplot2::geom_line() +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::facet_wrap(. ~ pr_type) +
ggplot2::labs(
title = "% of simulations with extreme attrition rates"
, subtitle = "Extreme observations less likely for higher headcount"
, x = "Mean annual headcount"
, y = "% of simulations"
) +
ggplot2::theme(axis.title.y = element_text(angle = 0))
ggsave("./part_2/pr_extreme.png"
, device = png()
, units = "in"
, width = 6
, height = 2
, dpi = 1200)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment