Skip to content

Instantly share code, notes, and snippets.

@davidciani
Created April 14, 2025 20:32
Rapid Decline in Popularity of Names
gender name final_peak_yr peak_rank bottom_yr bottom_rank delta_years delta_rank change_rate
female Alexa 2015 32 2023 513 8 -481 -60.13
female Breanna 1999 74 2022 838 23 -764 -33.22
female Kaitlin 1995 92 2022 940 27 -848 -31.41
female Courtney 1995 17 2023 847 28 -830 -29.64
female Annabelle 2014 57 2023 321 9 -264 -29.33
female Sheena 1984 80 2014 953 30 -873 -29.10
female Jada 2001 75 2023 712 22 -637 -28.95
female Katelyn 2001 52 2022 660 21 -608 -28.95
female Marissa 1995 53 2023 815 28 -762 -27.21
male Jase 2013 89 2023 520 10 -431 -43.10
male Brendan 1999 96 2022 666 23 -570 -24.78
male Colby 2001 98 2021 553 20 -455 -22.75
male Mitchell 1994 71 2021 642 27 -571 -21.15
male Cory 1988 59 2022 747 34 -688 -20.24
male Brett 1986 69 2021 735 35 -666 -19.03
male Taylor 1993 51 2021 547 28 -496 -17.71
male Dalton 1999 86 2021 471 22 -385 -17.50
male Brad 1975 97 2021 899 46 -802 -17.43
pacman::p_load("tidyverse","fs")
national_files = dir_ls(path(data_dir, "national"), regex = "yob\\d{4}\\.txt")
national_spec = cols(name = col_character(),
gender = col_factor(),
n = col_integer())
national = read_csv(
national_files,
col_names = names(national_spec$cols),
col_types = national_spec,
id = "file_name"
) |>
mutate(
yr = str_extract(file_name, "\\d{4}") |> as.integer(),
gender = fct_recode(gender, male = "M", female = "F"),
.keep = "unused"
) |>
select(yr, gender, name, n)
national_w_rank = national |>
mutate(rank = dense_rank(desc(n)), .by=c(yr,gender)) |>
select(-n)
national_peak_yr = national_w_rank |>
arrange(name, gender, yr) |>
filter(rank == min(rank), .by=c(name, gender)) |>
filter(row_number() == max(row_number()), .by=c(name, gender)) |>
select(
gender, name, final_peak_yr = yr, peak_rank = rank
)
after_peak = national_w_rank |>
left_join(national_peak_yr, by=c("gender", "name")) |>
filter(yr >= final_peak_yr) |>
arrange(name, gender)
lowest_rank_after_peak = after_peak |>
arrange(name, gender, yr) |>
filter(rank == max(rank), .by=c(name, gender)) |>
filter(row_number() == max(row_number()), .by=c(name, gender)) |>
select(
gender, name, bottom_yr = yr, bottom_rank = rank
)
name_change_rates = national_peak_yr |>
left_join(lowest_rank_after_peak, by=c("gender", "name")) |>
mutate(
delta_years = bottom_yr - final_peak_yr,
delta_rank = peak_rank - bottom_rank,
change_rate = delta_rank/delta_years,
)
name_change_rates |>
filter(peak_rank < 100) |>
filter(rank(change_rate)<10, .by="gender") |>
arrange(gender, change_rate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment