Skip to content

Instantly share code, notes, and snippets.

@lukeholman
Created August 4, 2018 01:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukeholman/01b47df8fd72bb68454b713539e04db7 to your computer and use it in GitHub Desktop.
Save lukeholman/01b47df8fd72bb68454b713539e04db7 to your computer and use it in GitHub Desktop.
Quick model about name-gender associations
library(ggplot2)
# Here, I use 'Kim' as shorthand for all names that differ in gender association between the local and immigrant population
# For simplicity I assume Kim is an equally common name in both countries
expand.grid(
p_kim_local = c(0.001, 0.01, 0.05, 0.1), # True proportion of locally-born people named Kim
p_male_kim_immigrants = 0.90, # pTrue roportion males among foreign-born people named Kim:
p_male_kim_locals = 0.10, # True proportion males among locally-born people named Kim:
p_male_kim_GENDERIZE = seq(0.1, 0.9, length = 11), # worldwide estimate for gender of people named Kim from Genderize.io
p_immigrants = seq(0, 0.5, length = 11), # True proportion of researchers who are immigrants
p_immigrants_male = 0.8, # True proportion of males among immigrant researchers not named Kim
p_residents_male = 0.6 # True proportion of males among non-immigrant researchers not named Kim
) -> parameters
parameters$p_kim_foreign <- parameters$p_kim_local
# Calculate the real proportion of males, across immigrants and non-immigrants in the focal country
parameters$real_pMale <-
with(parameters,
(1 - p_immigrants) * p_kim_local * p_male_kim_locals + # pMale among non-immigrants named Kim
p_immigrants * p_kim_foreign * p_male_kim_immigrants + # pMale among immigrants named Kim
(1 - p_immigrants) * (1 - p_kim_local) * p_residents_male + # pMale among non-immigrants NOT named Kim
p_immigrants * (1 - p_kim_local) * p_immigrants_male # pMale among immigrants NOT named Kim
)
# Estimate pMale, if we ignore country and use the world-wide estimate from Genderize.io
parameters$estimated_pMale_worldwide <-
with(parameters,
(1 - p_immigrants) * p_kim_local * p_male_kim_GENDERIZE + # pMale among non-immigrants named Kim
p_immigrants * p_kim_foreign * p_male_kim_GENDERIZE + # pMale among immigrants named Kim
(1 - p_immigrants) * (1 - p_kim_local) * p_residents_male + # pMale among non-immigrants NOT named Kim
p_immigrants * (1 - p_kim_local) * p_immigrants_male # pMale among immigrants NOT named Kim
)
# Estimate pMale, if we DO NOT ignore country, and use the country-specific estimate from Genderize.io
# I assume this ends up mis-classifying more immigrants, but improves classification of non-immigrants
parameters$estimated_pMale_local <-
with(parameters,
(1 - p_immigrants) * p_kim_local * p_male_kim_locals + # pMale among non-immigrants named Kim
p_immigrants * p_kim_foreign * p_male_kim_locals + # pMale among immigrants named Kim
(1 - p_immigrants) * (1 - p_kim_local) * p_residents_male + # pMale among non-immigrants NOT named Kim
p_immigrants * (1 - p_kim_local) * p_immigrants_male # pMale among immigrants NOT named Kim
)
# Calculate the difference in the absolute error when estimating the % men
# Positive numbers mean it is better to use the local name-gender associations, as done by Holman et al
parameters$difference_in_error <- with(parameters,
100*abs(estimated_pMale_worldwide - real_pMale) -
100*abs(estimated_pMale_local - real_pMale)
)
# Blue areas are places where it is best to use the country-specific associations
# Red areas show where it is best to ignore them, and use the world-wide estimate
ggplot(parameters,
aes(p_immigrants,
p_male_kim_GENDERIZE,
fill = difference_in_error)) +
geom_tile() +
scale_fill_gradient2(name = "Error in estimate\nof % males") +
facet_wrap(~p_kim_local) +
xlab("Proportion of researchers who are immigrants") +
ylab("Worldwide frequency of men\namong people named Kim") +
labs(title = "Blue means it's best to not ignore the country information",
subtitle = "Facets show frequency of people named Kim")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment