Skip to content

Instantly share code, notes, and snippets.

@roblanf
Last active January 24, 2021 08:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save roblanf/241978afe4237b3b605f to your computer and use it in GitHub Desktop.
Save roblanf/241978afe4237b3b605f to your computer and use it in GitHub Desktop.
code to make two basic plots showing gender balance in an institution. An example dataset is here: https://gist.github.com/roblanf/f0f9e331adc5aae84fb1. Full description here: www.robertlanfear.com/blog/files/visualising_gender_balance_R.html
library(ggplot2)
library(reshape2)
library(plyr)
all = read.csv("genderdata.csv")
######################## Plot 1 ###############################################
# The raw data: number of men and women in each role, by year
# we need to do a bit of work so we can plot roles in the right order
all_roles = c("Technical Staff", "Professional Staff", "Faculty Staff", "MSc student completion", "PhD student completion", "Postdoc", "Associate Lecturer", "Lecturer", "Senior Lecturer", "Associate Professor", "Professor", "Distinguished Professor", "Emeritus Professor")
all$role = factor(all$role, levels = all_roles)
all$order = as.numeric(all$role)/100 # a hack, but it works
# stacked pyramid plot, thanks to Didzis Elferts: http://stackoverflow.com/questions/14680075/simpler-population-pyramid-in-ggplot2
ggplot(data = all, aes(x = factor(order), fill = gender)) +
geom_bar(subset=.(gender=="woman")) +
geom_bar(subset=.(gender=="man"),aes(y=..count..*(-1))) +
scale_x_discrete(labels = all_roles) +
xlab("role") +
coord_flip() +
theme(text = element_text(size=16)) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
scale_fill_brewer(palette="Dark2") +
facet_wrap(~year)
######################## Plot 2 ###############################################
# Yearly proportions of women in staff, students/postdocs, faculty
# define some groups of people to sumamrise the data
staff = c("Technical Staff", "Professional Staff", "Faculty Staff")
s.p = c("MSc student completion", "PhD student completion", "Postdoc")
faculty = c("Associate Lecturer", "Lecturer", "Senior Lecturer", "Associate Professor", "Professor", "Distinguished Professor", "Emeritus Professor")
all$group[all$role %in% staff] = 'staff'
all$group[all$role %in% s.p] = 'students.and.postdocs'
all$group[all$role %in% faculty] = 'faculty'
# make a summary table
group.counts = as.data.frame(with(all, table(group, gender, year)))
group.counts = dcast(melt(group.counts), group * year ~ gender)
group.counts$proportion.women = group.counts$woman / (group.counts$man + group.counts$woman)
group.counts$group = factor(group.counts$group, levels = c("faculty", "students.and.postdocs", "staff"))
group.counts$year = as.numeric(as.character(group.counts$year))
# a function to get 95% CIs from binary count data, and we add a column of CIs to the dataframe
propci = function(r) prop.test(matrix(c(r$woman, r$man), nrow=1))$conf.int
group.counts = adply(group.counts, 1, propci)
ggplot(group.counts, aes(x = year, y = proportion.women, ymin = V1, ymax = V2)) +
geom_hline(aes(yintercept = 0.5), colour = "red", alpha = 0.5, size = 2) +
scale_x_continuous(labels = levels(factor(group.counts$year)), breaks = as.numeric(as.character(levels(factor(group.counts$year))))) +
geom_point(size = 4) +
geom_errorbar(aes(width = 0), size=1) +
geom_line(alpha = 0.5, linetype = "dashed", size=1) +
ylim(c(0,1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), text = element_text(size=20)) +
facet_wrap(~group, nrow = 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment