Last active
December 9, 2019 00:44
-
-
Save ikashnitsky/654965cb971f3a11928806c4d0a0ef23 to your computer and use it in GitHub Desktop.
Median age of the US living persons with a specific name -- https://twitter.com/ikashnitsky/status/1201770729282424832
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#=============================================================================== | |
# 2019-12-08 -- digital dem | |
# Lab 1 -- Popularity of the US names | |
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com | |
#=============================================================================== | |
# Data: https://www.ssa.gov/oact/babynames/limits.html | |
# Post: https://fivethirtyeight.com/features/how-to-tell-someones-age-when-all-you-know-is-her-name | |
# Gist: https://gist.github.com/ikashnitsky/654965cb971f3a11928806c4d0a0ef23 | |
# Tweet: https://twitter.com/ikashnitsky/status/1201770729282424832 | |
library(tidyverse) | |
library(magrittr) | |
library(patchwork) | |
library(paletteer) | |
library(hrbrthemes); import_roboto_condensed() | |
options(scipen = 999) | |
theme_set(ggdark::dark_theme_minimal(base_family = font_rc)) | |
# read the data ----------------------------------------------------------- | |
base_url <- "https://ikashnitsky.github.io/share/1912-us-names/" | |
# US names | |
t <- tempfile() | |
curl::curl_download(paste0(base_url, "names.rda"), destfile = t) | |
load(t) | |
# US life tables (assume English mortality before 1933) | |
t <- tempfile() | |
curl::curl_download(paste0(base_url, "surv.rda"), destfile = t) | |
load(t) | |
# join names and survival data | |
df <- names %>% | |
left_join(surv, by = c("year", "age", "sex")) %>% | |
replace_na(list(lx = 0)) %>% | |
# calculate number of survivors | |
mutate(n_surv = value %>% multiply_by(lx)) | |
# calculate median age for living name holders | |
df_med <- df %>% | |
group_by(name, sex) %>% | |
summarise(amed = matrixStats::weightedMedian(age, n_surv)) %>% | |
ungroup() | |
# the plotting function --------------------------------------------------- | |
plot_name_surivors <- function(choose_name = "Anna", choose_sex = "f") { | |
choose_sex <- tolower(choose_sex) | |
# filter out median age for the chosen name | |
median_age <- df_med %>% | |
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>% | |
pull(amed) | |
# helper to align label | |
max_y <- df %>% | |
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>% | |
pull(value) %>% | |
max(na.rm = TRUE) | |
# plot | |
df %>% | |
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>% | |
ggplot(aes(year))+ | |
geom_col(aes(y = n_surv, fill = lx %>% multiply_by(100)), | |
color = NA, width = .75, position = position_nudge(.5))+ | |
geom_step(aes(y = value), color = "cyan")+ | |
geom_vline(xintercept = 2019 %>% subtract(median_age))+ | |
annotate( | |
"text", | |
x = 2019 %>% subtract(median_age), | |
y = max_y, | |
label = paste( | |
"Median age of living\npersons named\n", | |
choose_name, ":", median_age %>% round(1) | |
), | |
vjust = 1, hjust = 1.1, lineheight = .9, fontface = 2, | |
family = font_rc | |
)+ | |
scale_fill_viridis_c( | |
option = "B", | |
guide = guide_colorbar( | |
barwidth = 15, barheight = .5 | |
), | |
limits = c(0, 100) | |
)+ | |
coord_cartesian(xlim = c(1880, 2014))+ | |
theme(legend.position = "bottom", | |
plot.title = element_text(family = "mono", size = 20), | |
axis.title.y = element_text(color = "cyan"))+ | |
labs(title = "How old is a person named", | |
x = NULL, | |
fill = "Proportion\nliving, %", | |
caption = "@ikashnitsky", | |
y = paste0("Number of persons born each year") )+ | |
annotate( | |
"text", x = 1881, y = max_y %>% multiply_by(.8), | |
label = choose_name, family = "mono", fontface = 2, | |
size = 16, color = "white", alpha = .2, | |
hjust = 0, vjust = 1 | |
) | |
} | |
# try plotting | |
plot_name_surivors() | |
plot_name_surivors(choose_name = "Sophia") | |
plot_name_surivors(choose_name = "Ilya", choose_sex = "m") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment