Replication code for blogpost "Is the left over-represented within academia?"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### NOTE: this file assumes you have Understanding Society data | |
### in a folder called data/ | |
## ----loadlibs------------------------------------------------------------ | |
library(rio) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
binom.ci <- function(p, n, alpha = 0.05, type = "lower") { | |
halfint <- qnorm(1 - alpha/2) * sqrt(p * (1-p) / n) | |
if (type =="lower") { | |
retval <- p - halfint | |
} else { | |
retval <- p + halfint | |
} | |
return(retval) | |
} | |
## ----loaddata------------------------------------------------------------ | |
w8var.df <- data.frame(l = letters[1:6], | |
var = c("_indinus_xw","_indinus_xw","_indinub_xw", ## Cross-sectional adult main interview weight | |
"_indinub_xw","_indinub_xw","_indinui_xw")) | |
if (!file.exists("data/vi_only.csv")) { | |
infiles <- list() | |
holder <- list() | |
for (l in letters[1:6]) { | |
infile <- paste0("data/", l, "_indresp.sav") | |
indresp <- infiles[[which(letters == l)]] <- import(infile) | |
### VI | |
### If they are close to a particular party, choose that | |
### if they are not, take VI | |
votevar1 <- paste0(l, "_vote4") | |
votevar2 <- paste0(l, "_vote3") | |
indresp$vi <- ifelse(is.element(indresp[,votevar1], 1:13), | |
indresp[,votevar1], | |
indresp[,votevar2]) | |
indresp$vi[!is.element(indresp$vi, 1:13)] <- NA | |
indresp$vi <- dplyr::recode(indresp$vi, | |
`1` = 'Cons', | |
`2` = 'Lab', | |
`3` = 'LDem', | |
`4` = 'SNP', | |
`5` = 'PC', | |
`6` = 'Green', | |
`7` = 'UU', | |
`8` = 'SDLP', | |
`9` = 'Alliance', | |
`10` = 'DUP', | |
`11` = 'SF', | |
`12` = 'UKIP', | |
`13` = 'BNP') | |
### Occupation | |
### 231 = College, university and higher education teaching professionals | |
occ_var <- paste0(l, "_jbisco88_cc") | |
keep <- which(indresp[,occ_var] == 231) | |
indresp$isHE <- 0 | |
indresp$isHE[keep] <- 1 | |
### Education | |
educ_var <- paste0(l, "_qfhigh_dv") | |
keep <- which(indresp[,educ_var] == 1) | |
indresp$hasPhD <- 0 | |
indresp$hasPhD[keep] <- 1 | |
### Weighting var | |
w8var <- match(l, w8var.df$l) | |
w8var <- w8var.df$var[w8var] | |
w8var <- paste0(l, w8var) | |
indresp$w8 <- indresp[,w8var] | |
df <- indresp[,c("pidp", occ_var, "isHE", "hasPhD", "vi","w8")] | |
names(df)[2] <- "occ" | |
df$wave <- l | |
holder[[which(letters == l)]] <- df | |
} | |
dat <- do.call("rbind", holder) | |
write.csv(dat, file = "data/vi_only.csv", row.names = FALSE) | |
} else { | |
dat <- read.csv("data/vi_only.csv") | |
} | |
dat <- subset(dat, !is.na(vi)) | |
## ----plotdf-------------------------------------------------------------- | |
HE_respondents <- with(subset(dat, isHE == 1), | |
by(vi, wave, function(x)sum(!is.na(x), na.rm = T))) | |
minHE <- min(HE_respondents) | |
maxHE <- max(HE_respondents) | |
## ----plotprep------------------------------------------------------------ | |
plot.df <- dat %>% | |
group_by(wave, isHE, vi) %>% | |
summarize(nResps = sum(w8), | |
unWeightedResps = n()) %>% | |
group_by(wave, isHE) %>% | |
mutate(nSample = sum(nResps), | |
nUnweighted = sum(unWeightedResps)) | |
plot.df$proportion <- plot.df$nResps / plot.df$nSample | |
plot.df$lower <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "lower") | |
plot.df$upper <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "upper") | |
plot.df$proportion.alt <- plot.df$unWeightedResps / plot.df$nUnweighted | |
plot.df$lower.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "lower") | |
plot.df$upper.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "upper") | |
plot.df$vi <- factor(plot.df$vi, | |
levels = c("Cons","Lab","UKIP","LDem","SNP","Green"), | |
ordered = TRUE) | |
party.cols <- c("#0087DC", | |
"#D50000", | |
"#70147A", | |
"#FDBB30", | |
"#FFFF00", | |
"#008142", | |
"#99CC33") | |
## ----plot, fig = TRUE, fig.cap = ""-------------------------------------- | |
ggplot(data = subset(plot.df, wave == "f" & vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")), | |
aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE), | |
fill = vi)) + | |
geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) + | |
geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) + | |
scale_fill_manual(values = party.cols) + | |
scale_x_discrete("Party closest to...") + | |
scale_y_continuous("Proportion", labels = percent) + | |
scale_color_manual(values = c("black", "black")) + | |
scale_alpha_manual(values = c(0.5, 1)) + | |
theme_bw() + | |
theme(legend.position = "none") + | |
labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)", | |
subtitle = "Source: Understanding Society Wave 6") | |
## ----plotbywave, fig = TRUE, fig.cap = ""-------------------------------- | |
plot.df$Wave <- charmatch(plot.df$wave, letters) | |
ggplot(data = subset(plot.df, vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")), | |
aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE), | |
fill = vi)) + | |
geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) + | |
geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) + | |
scale_fill_manual(values = party.cols) + | |
scale_x_discrete("Party closest to...") + | |
scale_y_continuous("Proportion", labels = percent) + | |
scale_color_manual(values = c("black", "black")) + | |
scale_alpha_manual(values = c(0.5, 1)) + | |
theme_bw() + | |
theme(legend.position = "none") + | |
labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)", | |
subtitle = "Source: Understanding Society, various waves") + | |
facet_wrap(~Wave) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment