Skip to content

Instantly share code, notes, and snippets.

@ikashnitsky
Last active December 9, 2019 00:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikashnitsky/654965cb971f3a11928806c4d0a0ef23 to your computer and use it in GitHub Desktop.
Save ikashnitsky/654965cb971f3a11928806c4d0a0ef23 to your computer and use it in GitHub Desktop.
Median age of the US living persons with a specific name -- https://twitter.com/ikashnitsky/status/1201770729282424832
#===============================================================================
# 2019-12-08 -- digital dem
# Lab 1 -- Popularity of the US names
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com
#===============================================================================
# Data: https://www.ssa.gov/oact/babynames/limits.html
# Post: https://fivethirtyeight.com/features/how-to-tell-someones-age-when-all-you-know-is-her-name
# Gist: https://gist.github.com/ikashnitsky/654965cb971f3a11928806c4d0a0ef23
# Tweet: https://twitter.com/ikashnitsky/status/1201770729282424832
library(tidyverse)
library(magrittr)
library(patchwork)
library(paletteer)
library(hrbrthemes); import_roboto_condensed()
options(scipen = 999)
theme_set(ggdark::dark_theme_minimal(base_family = font_rc))
# read the data -----------------------------------------------------------
base_url <- "https://ikashnitsky.github.io/share/1912-us-names/"
# US names
t <- tempfile()
curl::curl_download(paste0(base_url, "names.rda"), destfile = t)
load(t)
# US life tables (assume English mortality before 1933)
t <- tempfile()
curl::curl_download(paste0(base_url, "surv.rda"), destfile = t)
load(t)
# join names and survival data
df <- names %>%
left_join(surv, by = c("year", "age", "sex")) %>%
replace_na(list(lx = 0)) %>%
# calculate number of survivors
mutate(n_surv = value %>% multiply_by(lx))
# calculate median age for living name holders
df_med <- df %>%
group_by(name, sex) %>%
summarise(amed = matrixStats::weightedMedian(age, n_surv)) %>%
ungroup()
# the plotting function ---------------------------------------------------
plot_name_surivors <- function(choose_name = "Anna", choose_sex = "f") {
choose_sex <- tolower(choose_sex)
# filter out median age for the chosen name
median_age <- df_med %>%
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>%
pull(amed)
# helper to align label
max_y <- df %>%
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>%
pull(value) %>%
max(na.rm = TRUE)
# plot
df %>%
filter(name %>% is_in(choose_name), sex %>% is_in(choose_sex)) %>%
ggplot(aes(year))+
geom_col(aes(y = n_surv, fill = lx %>% multiply_by(100)),
color = NA, width = .75, position = position_nudge(.5))+
geom_step(aes(y = value), color = "cyan")+
geom_vline(xintercept = 2019 %>% subtract(median_age))+
annotate(
"text",
x = 2019 %>% subtract(median_age),
y = max_y,
label = paste(
"Median age of living\npersons named\n",
choose_name, ":", median_age %>% round(1)
),
vjust = 1, hjust = 1.1, lineheight = .9, fontface = 2,
family = font_rc
)+
scale_fill_viridis_c(
option = "B",
guide = guide_colorbar(
barwidth = 15, barheight = .5
),
limits = c(0, 100)
)+
coord_cartesian(xlim = c(1880, 2014))+
theme(legend.position = "bottom",
plot.title = element_text(family = "mono", size = 20),
axis.title.y = element_text(color = "cyan"))+
labs(title = "How old is a person named",
x = NULL,
fill = "Proportion\nliving, %",
caption = "@ikashnitsky",
y = paste0("Number of persons born each year") )+
annotate(
"text", x = 1881, y = max_y %>% multiply_by(.8),
label = choose_name, family = "mono", fontface = 2,
size = 16, color = "white", alpha = .2,
hjust = 0, vjust = 1
)
}
# try plotting
plot_name_surivors()
plot_name_surivors(choose_name = "Sophia")
plot_name_surivors(choose_name = "Ilya", choose_sex = "m")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment