Skip to content

Instantly share code, notes, and snippets.

@arvi1000
Created March 28, 2019 22:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arvi1000/abfca5bd21237ceb85e65db5df709e0c to your computer and use it in GitHub Desktop.
Save arvi1000/abfca5bd21237ceb85e65db5df709e0c to your computer and use it in GitHub Desktop.
library(tidyverse)
# read data
dat_raw <- read_csv('http://infographics.economist.com/databank/Economist_women-research.csv')
# drop stuff that's not the data
dat <- dat_raw[2:13,]
# fix names
names(dat) <-
dat_raw[1,] %>%
t %>%
unname %>%
tolower %>%
sub('%.*', 'inventors', .) %>%
gsub('science(s|\\b)?', 'sci', .)
# convert to long format. melt() forever, by the way -- sorry, tidyverse.
dat <- data.table::melt(dat, id.var='country')
dat$value <- as.numeric(dat$value)
# a bunch of mutations
dat <-
dat %>% mutate(
# group countries
country2 =
ifelse(country %in% c('Portugal', 'Japan', 'Brazil'),
country, 'Others*') %>%
factor(levels = rev(c('Japan', 'Others*', 'Brazil', 'Portugal')),
ordered = T),
# field of study to factor
variable = as.factor(variable)) %>%
# add offset for same-value collisions
group_by(variable, value) %>%
mutate(offset = dense_rank(country) - 1)
# economist-type colors, courtesy ggthemes
get_econ_clr <- function(clr_name) {
filter(ggthemes::ggthemes_data[["economist"]]$fg,
name == clr_name)$value[1]
}
plot_clrs <- c(Japan = get_econ_clr('pink'),
Portugal = get_econ_clr('dark blue'),
Brazil = get_econ_clr('light blue'),
`Others*` = get_econ_clr('gray'))
# make the plot. all the hard coded size values are cherry-picked/eyeballed
# for a specific plot window size
dat %>%
# drop this category
filter(variable != 'inventors') %>%
# this weird aes(x) call is so dots will stack up
ggplot(aes(x=as.numeric(variable) + offset/7, y=value)) +
# points and an 50% reference line
geom_point(size=1.5, aes(color=country2)) +
geom_hline(yintercept = .5) +
# make it wide not tall
coord_flip() +
scale_color_manual(values = plot_clrs) +
scale_x_continuous(breaks = seq_along(levels(dat$variable)),
labels = levels(dat$variable),
expand = c(.05,.05)) +
scale_y_continuous(expand = c(0, 0), limits = c(0,1),
labels = scales::percent) +
theme_minimal() +
theme(plot.title = element_text(hjust=3.5),
panel.grid.minor.y = element_blank(),
plot.caption = element_text(color='grey50')) +
labs(title = "Share of published researchers who are women, by field & country",
caption = paste0("*", other_countries),
x=NULL, y=NULL, color=NULL)
@arvi1000
Copy link
Author

Result:

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment