Skip to content

Instantly share code, notes, and snippets.

@arvi1000
Last active September 23, 2019 20:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arvi1000/bf8e2f781734c2fed14843d324c4ed98 to your computer and use it in GitHub Desktop.
Save arvi1000/bf8e2f781734c2fed14843d324c4ed98 to your computer and use it in GitHub Desktop.
library(tidyverse)
# zachary / zachery by year
z_dat <- babynames::babynames %>%
filter(grepl('^zach(a|e)ry$', tolower(name)) & year >= 1950) %>%
group_by(name, year) %>%
summarise(n=sum(n))
# when was peak zachary? 1993
z_dat %>%
group_by(name) %>%
summarise(year[which.max(n)])
# percent Zachery among (Zachary + Zachery) in a given year (5% in 86)
z_dat %>%
filter(year == 1986) %>%
with(., n[2] / sum(n))
# plot
z_dat %>%
ggplot(aes(x=year, y=n, color=name)) +
geom_line() +
facet_wrap(~name, scales='free_y') +
theme_light() +
theme(legend.position = 'none') +
scale_y_continuous(labels = scales::comma) +
labs(title='USA baby name frequency by year',
subtitle = "source: Social Security Administration",
y='occurrences', x='year')
# incidence of the less common spelling is pretty well correlated w the name's popularity
merge(
z_dat %>%
ungroup %>%
filter(name=='Zachary') %>%
mutate('Zachary'=n) %>%
select(-name, -n),
z_dat %>%
ungroup %>%
filter(name=='Zachery') %>%
mutate('Zachery'=n) %>%
select(-name, -n),
by='year', all=T) %>%
mutate(all_zach = Zachary + Zachery,
pct_zachery = Zachery / all_zach) %>%
data.table::melt(id.var='year') %>%
filter(variable %in% c('all_zach', 'pct_zachery'),
year %>% between(1980, 2015)) %>%
ggplot(aes(x=year, y=value, color=variable)) +
geom_line() +
facet_wrap(~variable, ncol=1, scales='free_y') +
labs(y=NULL) +
theme_light() + theme(legend.position='none') +
scale_color_brewer(palette = 1, type='qual')
@arvi1000
Copy link
Author

image

@arvi1000
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment