Created
March 21, 2019 22:37
-
-
Save hanowell/fd7046a4d06c232b417643bfce636874 to your computer and use it in GitHub Desktop.
how_to_measure_attrition_part_1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load packages ---- | |
library(tidyverse) | |
library(RcppRoll) | |
library(scales) | |
# Create three different headcount growth models ---- | |
dates <- seq.Date(as.Date("2006-01-01"), as.Date("2018-12-31"), by = "day") %>% | |
tibble::tibble(daily_date = ., t = 1:length(.)) %>% | |
dplyr::mutate( | |
month_begin_date = lubridate::floor_date(daily_date, unit = "month") | |
, calendar_year = lubridate::year(daily_date) | |
, quarter_of_year = lubridate::quarter(daily_date) | |
, month_of_year = lubridate::month(daily_date) | |
, day_of_month = lubridate::day(daily_date) | |
) %>% | |
dplyr::mutate(day_of_year = lubridate::yday(daily_date)) | |
starting_headcount <- 100 | |
capacity <- 1000 | |
linear_growth_rate <- capacity / max(dates$t) | |
linear_growth <- dates %>% | |
dplyr::mutate(growth_pattern = "Linear growth" | |
, headcount = starting_headcount + linear_growth_rate * t) | |
exponential_growth_rate <- 0.0005 | |
exponential_growth <- dates %>% | |
dplyr::mutate(growth_pattern = "Exponential growth" | |
, headcount = starting_headcount * exp(0.0005 * t)) | |
logistic_growth_rate <- 0.0015 | |
logistic_growth <- dates %>% | |
dplyr::mutate( | |
growth_pattern = "Logistic growth" | |
, headcount = ( | |
(capacity * starting_headcount * exp(logistic_growth_rate * t)) | |
/ (capacity + starting_headcount * (exp(logistic_growth_rate * t) - 1)) | |
) | |
) | |
headcount_models <- dplyr::bind_rows( | |
linear_growth | |
, exponential_growth | |
, logistic_growth | |
) %>% | |
dplyr::mutate(headcount = round(headcount)) %>% | |
dplyr::rename(period_number = t) | |
# Define total, regrettable, and unregrettable attrition rates ---- | |
total_attrition <- 0.2 # Annual rate | |
share_regrettable <- 0.5 # Share of attrition that is regrettable | |
# Simulate daily terminations ---- | |
headcount_models <- headcount_models %>% | |
dplyr::rowwise() %>% | |
dplyr::mutate( | |
regrettable = rpois(1, ((0.5 * total_attrition) / 365.25) * headcount) | |
, unregrettable = rpois(1, ((0.5 * total_attrition) / 365.25) * headcount) | |
) %>% | |
dplyr::ungroup() | |
# Function to find middle day of a month for a given input date ---- | |
middle_day <- function(input_date) { | |
round(0.5 * lubridate::days_in_month(input_date)) | |
} | |
# Create starting, mid-month, and ending headcount snapshots per month ---- | |
monthly_snapshots <- headcount_models %>% | |
dplyr::filter(day_of_month == 1 | |
| day_of_month == middle_day(daily_date) | |
| day_of_month == lubridate::days_in_month(daily_date)) %>% | |
dplyr::mutate(headcount_type = case_when( | |
day_of_month == 1 ~ "Start" | |
, day_of_month == middle_day(daily_date) ~ "Mid" | |
, day_of_month == lubridate::days_in_month(daily_date) ~ "End" | |
)) %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
dplyr::mutate(period_number = row_number()) %>% | |
dplyr::ungroup() %>% | |
dplyr::select(growth_pattern | |
, headcount_type | |
, period_number | |
, month_begin_date | |
, calendar_year | |
, month_of_year | |
, headcount) | |
# Create annual snapshots ---- | |
annual_snapshots <- headcount_models %>% | |
dplyr::filter(day_of_year == 1 | |
| (day_of_month == 2 & month_of_year == 7) | |
| (day_of_month == 31 & month_of_year == 12)) %>% | |
dplyr::mutate(headcount_type = case_when( | |
day_of_year == 1 ~ "Start" | |
, (day_of_month == 2 & month_of_year == 7) ~ "Mid" | |
, (day_of_month == 31 & month_of_year == 12) ~ "End" | |
)) %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
dplyr::mutate(period_number = row_number()) %>% | |
dplyr::ungroup() %>% | |
dplyr::select(growth_pattern | |
, headcount_type | |
, period_number | |
, calendar_year | |
, headcount) | |
# Append mid-period approximations ---- | |
append_approximate_mid <- function(data) { | |
data %>% | |
dplyr::filter(headcount_type %in% c("Start", "End")) %>% | |
dplyr::group_by(growth_pattern | |
, period_number) %>% | |
dplyr::summarize(headcount = mean(headcount)) %>% | |
dplyr::ungroup() %>% | |
dplyr::mutate(headcount_type = "(Start + End)/2") | |
} | |
monthly_snapshots <- monthly_snapshots %>% | |
append_approximate_mid() %>% | |
dplyr::left_join(monthly_snapshots %>% | |
dplyr::distinct(period_number | |
, month_begin_date | |
, calendar_year | |
, month_of_year)) %>% | |
dplyr::bind_rows(monthly_snapshots) | |
annual_snapshots <- annual_snapshots %>% | |
append_approximate_mid() %>% | |
dplyr::left_join(annual_snapshots %>% | |
dplyr::distinct(period_number, calendar_year)) %>% | |
dplyr::bind_rows(annual_snapshots) | |
# Create annual averages of monthly snapshots ---- | |
ann_avg_monthly_snapshots <- monthly_snapshots %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type | |
, calendar_year) %>% | |
dplyr::summarize(headcount = mean(headcount)) %>% | |
dplyr::ungroup() %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
dplyr::mutate(period_number = row_number()) %>% | |
dplyr::ungroup() | |
# Create trailing 13-month averages of month-end snapshots ---- | |
get_txavg <- function(data, x, m) { | |
data %>% | |
dplyr::filter(headcount_type == "End") %>% | |
dplyr::group_by(growth_pattern) %>% | |
dplyr::mutate(txavg = RcppRoll::roll_mean(headcount, x | |
, align = "right" | |
, fill = NA) | |
, period_number = row_number()) %>% | |
dplyr::ungroup() %>% | |
dplyr::filter(month_of_year %in% m, !is.na(txavg)) %>% | |
dplyr::select(growth_pattern | |
, calendar_year | |
, period_number | |
, txavg) %>% | |
dplyr::rename(headcount = txavg) | |
} | |
t13avg_month_end <- get_txavg(monthly_snapshots, 13, 12) | |
# Create trailing 4-month averages of month-end snapshots ---- | |
t4avg_month_end <- get_txavg(monthly_snapshots, 4, c(3, 6, 9, 12)) %>% | |
dplyr::mutate(quarter_of_year = rep(c(2:4, rep(1:4, 12)), 3)) | |
# Compute employee-months, -quarters, and -years, and terminations ---- | |
get_employee_periods <- function(data) { | |
data %>% | |
dplyr::summarize(employee_periods = mean(headcount) | |
, regrettable = sum(regrettable) | |
, unregrettable = sum(unregrettable)) %>% | |
dplyr::ungroup() | |
} | |
employee_months <- headcount_models %>% | |
dplyr::group_by(growth_pattern | |
, month_begin_date) %>% | |
get_employee_periods() | |
employee_quarters <- headcount_models %>% | |
dplyr::group_by(growth_pattern | |
, calendar_year | |
, quarter_of_year) %>% | |
get_employee_periods() | |
employee_years <- headcount_models %>% | |
dplyr::group_by(growth_pattern | |
, calendar_year) %>% | |
get_employee_periods() | |
monthly_snapshots <- dplyr::left_join(monthly_snapshots, employee_months) | |
ann_avg_monthly_snapshots <- ann_avg_monthly_snapshots %>% | |
dplyr::left_join(employee_years) | |
t13avg_month_end <- dplyr::left_join(t13avg_month_end, employee_years) | |
t4avg_month_end <- dplyr::left_join(t4avg_month_end, employee_quarters) | |
annual_snapshots <- dplyr::left_join(annual_snapshots, employee_years) | |
# Compute net new hires ---- | |
compute_net_new_hires <- function(data) { | |
data %>% | |
dplyr::mutate(headcount_change = lag(headcount) | |
, backfills = lag(regrettable) + lag(unregrettable) | |
, net_new_hires = headcount_change + backfills) %>% | |
dplyr::mutate_at(vars(headcount_change, backfills, net_new_hires) | |
, funs(coalesce(as.numeric(.), 0))) | |
} | |
headcount_models <- headcount_models %>% | |
dplyr::group_by(growth_pattern) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
monthly_snapshots <- monthly_snapshots %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
ann_avg_monthly_snapshots <- ann_avg_monthly_snapshots %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
t13avg_month_end <- t13avg_month_end %>% | |
dplyr::group_by(growth_pattern) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
t4avg_month_end <- t4avg_month_end %>% | |
dplyr::group_by(growth_pattern) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
annual_snapshots <- annual_snapshots %>% | |
dplyr::group_by(growth_pattern | |
, headcount_type) %>% | |
compute_net_new_hires() %>% | |
dplyr::ungroup() | |
# Compare headcount snapshots to employee periods ---- | |
headcount_share_of_budget <- 0.7 | |
compare_methods <- function(snapshots) { | |
snapshots %>% | |
dplyr::mutate( | |
headcount_signed_error = headcount - employee_periods | |
, headcount_signed_p_error = headcount_signed_error / employee_periods | |
, headcount_cost_signed_error_mm = (1e5 * headcount_signed_error) / 1e6 | |
, headcount_cost_actual_mm = (1e5 * employee_periods) / 1e6 | |
, budget_actual_mm = headcount_cost_actual_mm / headcount_share_of_budget | |
, headcount_budget_margin_error_mm = | |
headcount_cost_signed_error_mm / budget_actual_mm | |
) %>% | |
dplyr::mutate_at(vars(regrettable, unregrettable) | |
, funs(approximate_rate = . / headcount | |
, actual_rate = . / employee_periods)) %>% | |
dplyr::mutate( | |
regrettable_rate_signed_error = | |
regrettable_approximate_rate - regrettable_actual_rate | |
, regrettable_rate_signed_p_error = | |
regrettable_rate_signed_error / regrettable_actual_rate | |
, unregrettable_rate_signed_error = | |
unregrettable_approximate_rate - unregrettable_actual_rate | |
, unregrettable_rate_signed_p_error = | |
unregrettable_rate_signed_error / unregrettable_actual_rate | |
) %>% | |
dplyr::mutate_at(vars(contains("regrettable_rate_signed_error")) | |
, funs(. * 100)) | |
} | |
monthly_snapshots <- compare_methods(monthly_snapshots) | |
ann_avg_monthly_snapshots <- compare_methods(ann_avg_monthly_snapshots) | |
t13avg_month_end <- compare_methods(t13avg_month_end) | |
t4avg_month_end <- compare_methods(t4avg_month_end) | |
annual_snapshots <- compare_methods(annual_snapshots) | |
# Factorize headcount_type ---- | |
factorize_headcount_type <- function(data) { | |
data %>% | |
dplyr::mutate( | |
headcount_type = factor(headcount_type, levels = c("Start" | |
, "Mid" | |
, "(Start + End)/2" | |
, "End")) | |
) | |
} | |
monthly_snapshots <- factorize_headcount_type(monthly_snapshots) | |
ann_avg_monthly_snapshots <- ann_avg_monthly_snapshots %>% | |
factorize_headcount_type() | |
annual_snapshots <- factorize_headcount_type(annual_snapshots) | |
# Create a plot template ---- | |
plot_template <- function(gg, title, subtitle) { | |
gg + | |
ggplot2::geom_line() + | |
ggplot2::facet_wrap( | |
. ~ factor(growth_pattern, levels = c("Linear growth" | |
, "Exponential growth" | |
, "Logistic growth")) | |
) + | |
ggplot2::labs(title = title | |
, subtitle = subtitle | |
, x = NULL | |
, y = NULL | |
, color = "Headcount snapshot used") | |
} | |
save_a_plot <- function(filename, width = 6, height = 3.5) { | |
ggplot2::ggsave(filename | |
, device = png() | |
, dpi = 1200 | |
, units = "in" | |
, width = width | |
, height = height) | |
} | |
# Plot daily headcount for each modeling scenario ---- | |
gg <- ggplot2::ggplot(data = headcount_models) + | |
ggplot2::aes_string(x = "daily_date", y = "headcount") | |
plot_template(gg | |
, title = "Daily headcount for three growth pattern scenarios" | |
, subtitle = "Figures show number of employees per day") | |
save_a_plot("daily_headcount.png") | |
dev.off() | |
# Plot annual headcount snapshots and employee-years ---- | |
snapshot_plot_template <- function(data, title, subtitle, xstr, ystr) { | |
gg <- ggplot2::ggplot(data = data) + | |
ggplot2::aes_string(x = xstr, y = ystr, color = "headcount_type") | |
plot_template(gg, title = title, subtitle = subtitle) + | |
theme(axis.title.y = element_text(angle = 0), legend.position = "top") + | |
geom_line(aes(y = employee_periods), color = "black", linetype = "dashed") | |
} | |
snapshot_plot_template( | |
annual_snapshots | |
, title = "Annual headcount snapshots VS. exact average headcount" | |
, subtitle = "Dashed black line is exact average headcount" | |
, xstr = "calendar_year" | |
, ystr = "headcount" | |
) | |
save_a_plot("annual_headcount.png") | |
dev.off() | |
# Plot annual average monthly headcount snapshots and employee-years ---- | |
snapshot_plot_template( | |
ann_avg_monthly_snapshots | |
, title = paste0("Annual average monthly headcount snapshots\n" | |
, "VS. exact average headcount") | |
, subtitle = "Dashed black line is exact average headcount." | |
, xstr = "calendar_year" | |
, ystr = "headcount" | |
) | |
save_a_plot("annual_headcount_ann_avg_monthly.png") | |
dev.off() | |
# Plot headcount signed errors ---- | |
signed_error_plot_template <- function(data, title, subtitle, xstr, ystr) { | |
gg <- ggplot2::ggplot(data = data) + | |
ggplot2::aes_string(x = xstr, y = ystr, color = "headcount_type") | |
plot_template(gg, title, subtitle) + | |
ggplot2::theme(axis.title.y = element_text(angle = 0) | |
, legend.position = "top") + | |
ggplot2::geom_hline(aes(yintercept = 0) | |
, color = "black", linetype = "dashed") | |
} | |
signed_error_plot_template( | |
annual_snapshots | |
, "Signed error of four annual average headcount approximations" | |
, "Figures are number of employees" | |
, xstr = "calendar_year" | |
, ystr = "headcount_signed_error" | |
) | |
save_a_plot("annual_headcount_signed_error.png") | |
dev.off() | |
# Plot headcount signed errors for annual average monthly snapshots ---- | |
signed_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed error of four annual average headcount approximations" | |
, paste("Figures are number of employees." | |
, "Based on annual average monthly snapshots.") | |
, xstr = "calendar_year" | |
, ystr = "headcount_signed_error" | |
) | |
save_a_plot("annual_headcount_signed_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot headcount signed percent errors ---- | |
signed_p_error_plot_template <- function(data, title, subtitle, xstr, ystr) { | |
signed_error_plot_template(data, title, subtitle, xstr, ystr) + | |
ggplot2::scale_y_continuous(labels = scales::percent) | |
} | |
signed_p_error_plot_template( | |
annual_snapshots | |
, "Signed % error of four annual average headcount approximations" | |
, paste("Figures are % difference between headcount snapshot and" | |
, "exact average headcount") | |
, xstr = "calendar_year" | |
, ystr = "headcount_signed_p_error" | |
) | |
save_a_plot("annual_headcount_signed_p_error.png") | |
dev.off() | |
# Plot headcount signed percent errors for annual monthly averages ---- | |
signed_p_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed % error of four annual average headcount approximations" | |
, paste( | |
"Figures are % difference between headcount snapshot and exact" | |
, "average headcount\nBased on annual averages of monthly headcount." | |
) | |
, xstr = "calendar_year" | |
, ystr = "headcount_signed_p_error" | |
) | |
save_a_plot("annual_headcount_signed_p_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot regrettable attriton rate signed errors ---- | |
signed_error_plot_template( | |
annual_snapshots | |
, "Signed error of four annual regrettable attrition approximations" | |
, "Figures are differences in attrition rate per 100 full employee years" | |
, xstr = "calendar_year" | |
, ystr = "regrettable_rate_signed_error" | |
) | |
save_a_plot("regrettable_attrition_signed_error.png") | |
dev.off() | |
# Plot regrettable attrition signed errors of annual average monthly heads ---- | |
signed_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed error of four annual regrettable attrition approximations" | |
, paste0( | |
"Figures are differences in attrition rate per 100 full employee years.\n" | |
, "Based on annual average monthly headcount snapshots." | |
) | |
, xstr = "calendar_year" | |
, ystr = "regrettable_rate_signed_error" | |
) | |
save_a_plot("regrettable_attrition_signed_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot regrettable attrition rate signed percent errors ---- | |
signed_p_error_plot_template( | |
annual_snapshots | |
, "Signed % error of four annual regrettable attrition approximations" | |
, "Figures are % difference between approximate and exact attrition rate" | |
, xstr = "calendar_year" | |
, ystr = "regrettable_rate_signed_p_error" | |
) | |
save_a_plot("regrettable_attrition_signed_p_error.png") | |
dev.off() | |
# Plot attrition rate signed percent errors of ann. avg. monthly ---- | |
signed_p_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed % error of four annual regrettable attrition approximations" | |
, paste0( | |
"Figures are % difference between approximate and exact attrition rate.\n" | |
, "Based on annual average monthly headcount snapshots." | |
) | |
, xstr = "calendar_year" | |
, ystr = "regrettable_rate_signed_p_error" | |
) | |
save_a_plot("regrettable_attrition_signed_p_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot headcount cost signed errors ---- | |
signed_error_plot_template( | |
annual_snapshots | |
, "Signed error of four annual headcount cost approximations" | |
, "Figures are differences in millions of dollars" | |
, xstr = "calendar_year" | |
, ystr = "headcount_cost_signed_error_mm" | |
) | |
save_a_plot("headcount_cost_signed_error.png") | |
dev.off() | |
# Plot headcount cost signed errors for annual average monthly headcount ---- | |
signed_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed error of four annual headcount cost approximations" | |
, paste0("Figures are differences in millions of dollars\n" | |
, "Based on annual average monthly headcount snapshots.") | |
, xstr = "calendar_year" | |
, ystr = "headcount_cost_signed_error_mm" | |
) | |
save_a_plot("headcount_cost_signed_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot headcount budget margin error ---- | |
signed_p_error_plot_template( | |
annual_snapshots | |
, "Signed error of four approximations of headcount budget share" | |
, "Figures are signed differences from exact headcount budget as % of total" | |
, xstr = "calendar_year" | |
, ystr = "headcount_budget_margin_error_mm" | |
) | |
save_a_plot("headcount_budget_share_signed_error.png") | |
dev.off() | |
# Plot headcount budget margin error ---- | |
signed_p_error_plot_template( | |
annual_snapshots | |
, "Signed error of four approximations of headcount budget share" | |
, "Figures are signed differences from exact headcount budget as % of total" | |
, xstr = "calendar_year" | |
, ystr = "headcount_budget_margin_error_mm" | |
) | |
save_a_plot("headcount_budget_share_signed_error.png") | |
dev.off() | |
# Plot headcount budget margin error ---- | |
signed_p_error_plot_template( | |
ann_avg_monthly_snapshots | |
, "Signed error of four approximations of headcount budget share" | |
, paste0( | |
"Figures are signed differences from exact headcount budget as % of total.\n" | |
, "Based on annual average monthly headcount snapshots." | |
) | |
, xstr = "calendar_year" | |
, ystr = "headcount_budget_margin_error_mm" | |
) | |
save_a_plot("headcount_budget_share_signed_error_ann_avg_monthly.png") | |
dev.off() | |
# Plot comparison of trailing-13-month average to monthly average headcount ---- | |
bind_t13_ann_avg <- ann_avg_monthly_snapshots %>% | |
dplyr::filter(headcount_type == "End") %>% | |
dplyr::select(-headcount_type) %>% | |
dplyr::mutate(averaging_method = "Annual average") %>% | |
dplyr::bind_rows(t13avg_month_end %>% | |
dplyr::mutate(averaging_method = "Trailing 13 months")) | |
ggplot2::ggplot(data = bind_t13_ann_avg) + | |
ggplot2::aes(x = calendar_year | |
, y = headcount_signed_error | |
, linetype = averaging_method) + | |
ggplot2::geom_line(color = "purple") + | |
ggplot2::facet_wrap( | |
. ~ factor(growth_pattern, levels = c("Linear growth" | |
, "Exponential growth" | |
, "Logistic growth")) | |
) + | |
ggplot2::geom_hline(aes(yintercept = 0) | |
, linetype = "dashed", color = "black") + | |
ggplot2::labs( | |
title = "Signed error of two annual headcount approximations" | |
, subtitle = paste0("Annual average of month-end headcounts\n", | |
"VS. trailing 13-month average of month-end headcounts") | |
, x = NULL | |
, y = NULL | |
, color = "Averaging method" | |
) | |
save_a_plot("avg_end_of_month_vs_t13avg_end_of_month.png") | |
dev.off() | |
# Plot comparison of trailing 13-month average to mid-year ---- | |
bind_t13_mid_year <- annual_snapshots %>% | |
dplyr::filter(headcount_type == "Mid") %>% | |
dplyr::rename( | |
headcount_signed_error_mid_year = headcount_signed_error | |
) %>% | |
dplyr::select(growth_pattern | |
, calendar_year | |
, headcount_signed_error_mid_year) %>% | |
dplyr::left_join( | |
t13avg_month_end %>% | |
dplyr::rename( | |
headcount_signed_error_t13avg = headcount_signed_error | |
) %>% | |
dplyr::select(growth_pattern | |
, calendar_year | |
, headcount_signed_error_t13avg) | |
) | |
ggplot2::ggplot(data = bind_t13_mid_year) + | |
ggplot2::aes(x = calendar_year) + | |
ggplot2::geom_line(aes(y = headcount_signed_error_t13avg) | |
, color = "purple" | |
, linetype = "dashed") + | |
ggplot2::geom_line(aes(y = headcount_signed_error_mid_year) | |
, color = "green3") + | |
ggplot2::facet_wrap( | |
. ~ factor(growth_pattern, levels = c("Linear growth" | |
, "Exponential growth" | |
, "Logistic growth")) | |
) + | |
ggplot2::geom_hline(aes(yintercept = 0) | |
, linetype = "dashed", color = "black") + | |
ggplot2::labs( | |
title = "Signed error of two annual headcount approximations" | |
, subtitle = paste0( | |
"Annual mid-year (green)\n", | |
"VS. trailing 13-month average month-end (purple dashed)" | |
) | |
, x = NULL | |
, y = NULL | |
) | |
save_a_plot("mid_year_vs_t13avg_end_of_month.png") | |
dev.off() | |
# Plot trailing 13-month average versus average of mid-month ---- | |
bind_t13_mid_month <- ann_avg_monthly_snapshots %>% | |
dplyr::filter(headcount_type == "Mid") %>% | |
dplyr::rename( | |
headcount_signed_error_mid_month = headcount_signed_error | |
) %>% | |
dplyr::select(growth_pattern | |
, calendar_year | |
, headcount_signed_error_mid_month) %>% | |
dplyr::left_join( | |
t13avg_month_end %>% | |
dplyr::rename( | |
headcount_signed_error_t13avg = headcount_signed_error | |
) %>% | |
dplyr::select(growth_pattern | |
, calendar_year | |
, headcount_signed_error_t13avg) | |
) | |
ggplot2::ggplot(data = bind_t13_mid_month) + | |
ggplot2::aes(x = calendar_year) + | |
ggplot2::geom_line(aes(y = headcount_signed_error_t13avg) | |
, color = "purple" | |
, linetype = "dashed") + | |
ggplot2::geom_line(aes(y = headcount_signed_error_mid_month) | |
, color = "green3") + | |
ggplot2::facet_wrap( | |
. ~ factor(growth_pattern, levels = c("Linear growth" | |
, "Exponential growth" | |
, "Logistic growth")) | |
) + | |
ggplot2::geom_hline(aes(yintercept = 0) | |
, linetype = "dashed", color = "black") + | |
ggplot2::labs( | |
title = "Signed error of two annual headcount approximations" | |
, subtitle = paste0( | |
"Annual average mid-month (green)\n" | |
, "VS. trailing 13-month average month-end (purple dashed)" | |
) | |
, x = NULL | |
, y = NULL | |
) | |
save_a_plot("avg_mid_month_vs_t13avg_end_of_month.png") | |
dev.off() | |
# Create toy example ---- | |
days_in_2018 <- length(seq.Date(as.Date("2018-01-01") | |
, as.Date("2018-12-31") | |
, by = "day")) | |
toy_example <- tibble::tribble( | |
~employee, ~start_date, ~end_date, | |
"Amad", "2018-01-01", NA, | |
"Janice", "1997-04-15", "2018-07-04", | |
"Brian", "2017-05-15", "2018-12-08", | |
"Vivian", "2018-05-17", NA, | |
"Azariannah", "2018-05-17", "2018-11-11" | |
) %>% | |
# Note that this code doesn't generalize to case when attrition | |
# can occur on the starting or beginning date of the period. | |
dplyr::mutate_at(vars(contains("date")), as.Date) %>% | |
dplyr::mutate( | |
start_date_2018 = if_else(start_date < "2018-01-01" | |
, as.Date("2018-01-01") | |
, start_date) | |
, end_date_2018 = coalesce(end_date, as.Date("2018-12-31")) | |
, in_start_count = start_date_2018 == "2018-01-01" | |
, in_mid_count = (start_date_2018 <= "2018-07-02" | |
& end_date_2018 >= "2018-07-02") | |
, in_end_count = end_date_2018 == "2018-12-31" | |
, employee_days = as.numeric(end_date_2018 - start_date_2018) + 1 | |
, terminated = end_date_2018 < "2018-12-31" | |
) | |
toy_example_aggregate <- toy_example %>% | |
dplyr::summarize_at(vars(starts_with("in_") | |
, employee_days | |
, terminated) | |
, sum) %>% | |
dplyr::mutate(start_end_avg = 0.5 * (in_start_count + in_end_count)) %>% | |
dplyr::mutate_at(vars(starts_with("in_"), start_end_avg) | |
, funs(attrition = terminated / .)) %>% | |
dplyr::mutate(employee_years = employee_days / days_in_2018 | |
, employee_years_attrition = terminated / employee_years) %>% | |
dplyr::select(terminated | |
, starts_with("in_") | |
, starts_with("start_end") | |
, starts_with("employee_years")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment