Skip to content

Instantly share code, notes, and snippets.

@paldhous
Last active March 5, 2019 00:47
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save paldhous/1b0c4bf5873bbd93165d7c3a257ce524 to your computer and use it in GitHub Desktop.
R code to generate a randomly selected combination of first/last names from 1990 data
# load required packages
library(dplyr)
library(babynames)
library(readr)
# first names from 1990 Social Security Adminstration baby names data
malenames <- babynames %>%
filter(year == 1990 & sex == "M")
malenames <- malenames %>%
mutate(rank = seq(1:nrow(malenames)))
femalenames <- babynames %>%
filter(year == 1990 & sex == "F")
femalenames <- femalenames %>%
mutate(rank = seq(1:nrow(femalenames)))
# sample names
male_sample <- tibble(rank=sample(c(1:nrow(malenames)), prob = malenames$prop, size = 10)) %>%
inner_join(malenames)
female_sample <- tibble(rank=sample(c(1:nrow(femalenames)), prob = femalenames$prop, size = 10)) %>%
inner_join(femalenames)
# surnames from 1990 US Census
surnames <- read.fwf("https://www2.census.gov/topics/genealogy/1990surnames/dist.all.last", widths =c(15,6,8,7))
names(surnames) <- c("name","pc","cum_pc","rank")
# sample names
sample_m <- tibble(rank=sample(c(1:nrow(surnames)), prob = surnames$pc, size = 10)) %>%
inner_join(surnames)
sample_f <- tibble(rank=sample(c(1:nrow(surnames)), prob = surnames$pc, size = 10)) %>%
inner_join(surnames)
# final names
female_names_10 <- tibble(first=female_sample$name,last=sample_f$name)
male_names_10 <- tibble(first=male_sample$name,last=sample_m$name)
write_csv(female_names_10,"female_names.csv",na="")
write_csv(male_names_10,"male_names.csv",na="")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment