Skip to content

Instantly share code, notes, and snippets.

@Deleetdk
Created March 6, 2021 23:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Deleetdk/c1abd13ac3e75fff9911446eefd5c383 to your computer and use it in GitHub Desktop.
Save Deleetdk/c1abd13ac3e75fff9911446eefd5c383 to your computer and use it in GitHub Desktop.
#CRAN versions dont work
#devtools::install_github("lmullen/gender")
#devtools::install_github("lmullen/genderdata")
library(rvest)
library(kirkegaard)
library(gender)
library(patchwork)
theme_set(theme_classic())
#read file
x = read_html("~/Documents/facebook-emilkirkegaard/friends/friends.html")
#get names
x %>%
html_nodes("._2lel") %>%
html_text() ->
full_names
#first names
first_names = full_names %>% str_match("^[^ ]+") %>% as.vector()
#count doesn't match count on site, but it's close enough 326/339=96%
#try classify
gender::gender(first_names, method = "napp") -> results
results$proportion_male %>% describe()
results$proportion_male %>% round() %>% table2()
#try another method
gender::gender(first_names, method = "ssa") -> results2
results2$proportion_male %>% describe()
results2$proportion_male %>% round() %>% table2()
#visual representations
#split 5 ways
results$male = discretize(results$proportion_male, breaks = c(0, .1, .3, .7, .9, 1))
results2$male = discretize(results2$proportion_male, breaks = c(0, .1, .3, .7, .9, 1))
levels(results$male) = c("Female", "Probably female", "🤷", "Probably male", "Male")
levels(results2$male) = c("Female", "Probably female", "🤷", "Probably male", "Male")
results %>%
ggplot(aes(male)) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_y_continuous("Percent", labels = scales::percent, breaks = seq(0, 1, .1)) +
xlab("According to algorithm 1") +
results2 %>%
ggplot(aes(male)) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_y_continuous("Percent", labels = scales::percent, breaks = seq(0, 1, .1)) +
xlab("According to algorithm 2")
GG_save("~/Pictures/facebook_friends_sex.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment