Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created March 8, 2017 21:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save chrishanretty/e03cd5deaa0f5c3fbe6f9b733bc4ddb2 to your computer and use it in GitHub Desktop.
Save chrishanretty/e03cd5deaa0f5c3fbe6f9b733bc4ddb2 to your computer and use it in GitHub Desktop.
Replication code for blogpost "Is the left over-represented within academia?"
### NOTE: this file assumes you have Understanding Society data
### in a folder called data/
## ----loadlibs------------------------------------------------------------
library(rio)
library(dplyr)
library(ggplot2)
library(scales)
binom.ci <- function(p, n, alpha = 0.05, type = "lower") {
halfint <- qnorm(1 - alpha/2) * sqrt(p * (1-p) / n)
if (type =="lower") {
retval <- p - halfint
} else {
retval <- p + halfint
}
return(retval)
}
## ----loaddata------------------------------------------------------------
w8var.df <- data.frame(l = letters[1:6],
var = c("_indinus_xw","_indinus_xw","_indinub_xw", ## Cross-sectional adult main interview weight
"_indinub_xw","_indinub_xw","_indinui_xw"))
if (!file.exists("data/vi_only.csv")) {
infiles <- list()
holder <- list()
for (l in letters[1:6]) {
infile <- paste0("data/", l, "_indresp.sav")
indresp <- infiles[[which(letters == l)]] <- import(infile)
### VI
### If they are close to a particular party, choose that
### if they are not, take VI
votevar1 <- paste0(l, "_vote4")
votevar2 <- paste0(l, "_vote3")
indresp$vi <- ifelse(is.element(indresp[,votevar1], 1:13),
indresp[,votevar1],
indresp[,votevar2])
indresp$vi[!is.element(indresp$vi, 1:13)] <- NA
indresp$vi <- dplyr::recode(indresp$vi,
`1` = 'Cons',
`2` = 'Lab',
`3` = 'LDem',
`4` = 'SNP',
`5` = 'PC',
`6` = 'Green',
`7` = 'UU',
`8` = 'SDLP',
`9` = 'Alliance',
`10` = 'DUP',
`11` = 'SF',
`12` = 'UKIP',
`13` = 'BNP')
### Occupation
### 231 = College, university and higher education teaching professionals
occ_var <- paste0(l, "_jbisco88_cc")
keep <- which(indresp[,occ_var] == 231)
indresp$isHE <- 0
indresp$isHE[keep] <- 1
### Education
educ_var <- paste0(l, "_qfhigh_dv")
keep <- which(indresp[,educ_var] == 1)
indresp$hasPhD <- 0
indresp$hasPhD[keep] <- 1
### Weighting var
w8var <- match(l, w8var.df$l)
w8var <- w8var.df$var[w8var]
w8var <- paste0(l, w8var)
indresp$w8 <- indresp[,w8var]
df <- indresp[,c("pidp", occ_var, "isHE", "hasPhD", "vi","w8")]
names(df)[2] <- "occ"
df$wave <- l
holder[[which(letters == l)]] <- df
}
dat <- do.call("rbind", holder)
write.csv(dat, file = "data/vi_only.csv", row.names = FALSE)
} else {
dat <- read.csv("data/vi_only.csv")
}
dat <- subset(dat, !is.na(vi))
## ----plotdf--------------------------------------------------------------
HE_respondents <- with(subset(dat, isHE == 1),
by(vi, wave, function(x)sum(!is.na(x), na.rm = T)))
minHE <- min(HE_respondents)
maxHE <- max(HE_respondents)
## ----plotprep------------------------------------------------------------
plot.df <- dat %>%
group_by(wave, isHE, vi) %>%
summarize(nResps = sum(w8),
unWeightedResps = n()) %>%
group_by(wave, isHE) %>%
mutate(nSample = sum(nResps),
nUnweighted = sum(unWeightedResps))
plot.df$proportion <- plot.df$nResps / plot.df$nSample
plot.df$lower <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "lower")
plot.df$upper <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "upper")
plot.df$proportion.alt <- plot.df$unWeightedResps / plot.df$nUnweighted
plot.df$lower.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "lower")
plot.df$upper.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "upper")
plot.df$vi <- factor(plot.df$vi,
levels = c("Cons","Lab","UKIP","LDem","SNP","Green"),
ordered = TRUE)
party.cols <- c("#0087DC",
"#D50000",
"#70147A",
"#FDBB30",
"#FFFF00",
"#008142",
"#99CC33")
## ----plot, fig = TRUE, fig.cap = ""--------------------------------------
ggplot(data = subset(plot.df, wave == "f" & vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
fill = vi)) +
geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
scale_fill_manual(values = party.cols) +
scale_x_discrete("Party closest to...") +
scale_y_continuous("Proportion", labels = percent) +
scale_color_manual(values = c("black", "black")) +
scale_alpha_manual(values = c(0.5, 1)) +
theme_bw() +
theme(legend.position = "none") +
labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
subtitle = "Source: Understanding Society Wave 6")
## ----plotbywave, fig = TRUE, fig.cap = ""--------------------------------
plot.df$Wave <- charmatch(plot.df$wave, letters)
ggplot(data = subset(plot.df, vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
fill = vi)) +
geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
scale_fill_manual(values = party.cols) +
scale_x_discrete("Party closest to...") +
scale_y_continuous("Proportion", labels = percent) +
scale_color_manual(values = c("black", "black")) +
scale_alpha_manual(values = c(0.5, 1)) +
theme_bw() +
theme(legend.position = "none") +
labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
subtitle = "Source: Understanding Society, various waves") +
facet_wrap(~Wave)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment