Skip to content

Instantly share code, notes, and snippets.

@GeorgeOduor
Last active October 4, 2020 09:12
Show Gist options
  • Save GeorgeOduor/86a1b5d1d3573590e72a66bd8bd8edb0 to your computer and use it in GitHub Desktop.
Save GeorgeOduor/86a1b5d1d3573590e72a66bd8bd8edb0 to your computer and use it in GitHub Desktop.
# i used these linses of code to scrap african gendernames from the internet for one of my projects, it may not help you
library(rvest)
library(mbanalytics)
url = "https://www.momjunction.com/baby-names/african/page/%s/"
Allnames = 1:20%>%
map_df(~sprintf(url,.) %>%
read_html() %>% html_table(fill = T) %>% as.data.frame() %>%
select(NAMES,GENDER) %>% filter(!grepl("googletag.cmd.",NAMES),GENDER != "Unisex") %>%
mutate(GENDER = ifelse(GENDER == "Boy","M","F"), NAMES = tolower(NAMES))) %>%
rename_all(tolower)
url2 = "https://www.behindthename.com/submit/names/usage/eastern-african/%s"
Allnames2 = 1:3 %>%
map(~sprintf(url2,.) %>% read_html()) %>%
map_df(~tibble(names = html_nodes(., ".listname") %>% html_text() %>% tolower(),
gender = html_nodes(., ".listgender") %>% html_text()%>% tolower() )) %>% filter(gender != "f & m")
url3 = 'http://www.firstnamesbaby.com/Baby-Boy-Names-Popular/Kenyan/%s/Boy/'
url4 = 'http://www.firstnamesbaby.com/Names-By-Country/Kenyan/Girl/page%s'
malenames = LETTERS %>%
map_df(~ sprintf(url3,.) %>%
read_html() %>%
html_table(fill = T) %>% enframe() %>% filter(name == 1) %>% unnest(value) %>%
separate(X1,into = c("names",'gender'),sep = "[|]") %>% select(names,gender) %>%
mutate_all(.funs = trimws) %>% mutate(gender = ifelse(gender == "Boy","M","F")))
femalenames = 1:11 %>%
map_df(~sprintf(url4,.) %>%
read_html() %>%
html_table(fill = T) %>% enframe() %>% filter(name == 1) %>% unnest(value) %>%
separate(X1,into = c("names",'gender'),sep = "[|]") %>% select(names,gender) %>%
mutate_all(.funs = trimws) %>% mutate(gender = ifelse(gender == "Boy","M","F")))
Allnames3 = malenames %>% rbind(femalenames) %>%
mutate(names = tolower(names))
allnames = rbind(Allnames,Allnames2,Allnames3)
full_list = list(genderdata::ipums_usa,genderdata::napp,genderdata::ssa_national) %>%
map(~mutate(.,gender = ifelse(male > female ,"M","F"))) %>%
bind_rows(genderdata::ssa_state %>% mutate(.,gender = ifelse(M > `F` ,"M","F")),
genderdata::kantrowitz,allnames) %>%
select(name,gender)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment