- garrick
- 2019-05-13
Using the tidyverse, gganimate, and babynames.
library(tidyverse)
library(gganimate)
library(babynames)
But first, handle fonts and the base plot theme
showtext::showtext_auto()
sysfonts::font_add_google("PT Sans")
sysfonts::font_add_google("PT Sans Narrow")
theme_set(
theme_minimal(base_size = 16, base_family = "PT Sans") +
theme(axis.text = element_text(family = "PT Sans Narrow"))
)
First, create a list of names and the letters they contain, without counting duplicated letters within each name.
babynames_have_letters <-
babynames %>%
distinct(name) %>%
mutate(letter = strsplit(tolower(name), character())) %>%
unnest(letter) %>%
distinct()
head(babynames_have_letters)
## # A tibble: 6 x 2
## name letter
## <chr> <chr>
## 1 Mary m
## 2 Mary a
## 3 Mary r
## 4 Mary y
## 5 Anna a
## 6 Anna n
Then join babynames
with the name-to-letters table and sum the
proportions of the population (by year and sex) having each letter in
their name.
babynames_containing <-
left_join(babynames, babynames_have_letters, by = "name") %>%
group_by(letter, year, sex) %>%
summarize(prop = sum(prop))
head(babynames_containing)
## # A tibble: 6 x 4
## # Groups: letter, year [3]
## letter year sex prop
## <chr> <dbl> <chr> <dbl>
## 1 a 1880 F 0.695
## 2 a 1880 M 0.520
## 3 a 1881 F 0.687
## 4 a 1881 M 0.517
## 5 a 1882 F 0.683
## 6 a 1882 M 0.515
Finally, use ggplot2
and gganimate
to visualize the proportion of
the population with each letter of the alphabet in their name over the
last 100 years.
gb <-
babynames_containing %>%
ungroup() %>%
filter(year >= 2017 - 100) %>%
mutate(sex = recode(sex, M = "Male", F = "Female")) %>%
ggplot() +
aes(letter, prop, fill = sex) +
geom_col(position = "identity", alpha = 0.6) +
scale_y_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
scale_fill_manual(values = c("Male" = "#00589A", "Female" = "#EB1455")) +
labs(
x = NULL,
y = "Percent of Population",
fill = NULL,
title = "How many people have the letter _____ in their name?",
subtitle = "{closest_state}",
caption = paste(
"Source: {{babynames}}, U.S. Social Security Administration",
"Chart: Garrick Aden-Buie (@grrrck)",
sep = "\n"
)
) +
theme(
legend.position = c(0.5, 0.9),
legend.direction = "horizontal",
plot.subtitle = element_text(size = rel(1.5), hjust = 0.5, margin = margin(t = 10)),
legend.background = element_rect(fill = "white", color = "white"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x = element_text(vjust = 0.9, face = "bold"),
plot.caption = element_text(color = "grey40", lineheight = 1.1)
) +
ease_aes("linear") +
transition_states(year, transition_length = 1, state_length = 0, wrap = FALSE)
gb_animated <- animate(gb, nframes = 205, width = 1024, height = 512, end_pause = 5)
anim_save(
"babynames_letter_name_popularity.gif",
animation = gb_animated
)
gb_animated