chrishanretty/understanding_society.R

## understanding_society.R
### NOTE: this file assumes you have Understanding Society data
### in a folder called data/

## ----loadlibs------------------------------------------------------------
library(rio)
library(dplyr)
library(ggplot2)
library(scales)
binom.ci <- function(p, n, alpha = 0.05, type = "lower") {
	halfint <- qnorm(1 - alpha/2) * sqrt(p * (1-p) / n)
	if (type =="lower") {
		retval <- p - halfint
	} else {
		retval <- p + halfint
	}
	return(retval)
}


## ----loaddata------------------------------------------------------------
w8var.df <- data.frame(l = letters[1:6],
	var = c("_indinus_xw","_indinus_xw","_indinub_xw", ## Cross-sectional adult main interview weight
		"_indinub_xw","_indinub_xw","_indinui_xw"))

if (!file.exists("data/vi_only.csv")) {
	infiles <- list()
	holder <- list()
	for (l in letters[1:6]) {
		infile <- paste0("data/", l, "_indresp.sav")
		indresp <- infiles[[which(letters == l)]] <-  import(infile)

		### VI
		### If they are close to a particular party, choose that
		### if they are not, take VI
		votevar1 <- paste0(l, "_vote4")
		votevar2 <- paste0(l, "_vote3")
		indresp$vi <- ifelse(is.element(indresp[,votevar1], 1:13),
			indresp[,votevar1],
			indresp[,votevar2])

		indresp$vi[!is.element(indresp$vi, 1:13)] <- NA

		indresp$vi <- dplyr::recode(indresp$vi,
			`1` = 'Cons',
			`2` = 'Lab',
			`3` = 'LDem',
			`4` = 'SNP',
			`5` = 'PC',
			`6` = 'Green',
			`7` = 'UU',
			`8` = 'SDLP',
			`9` = 'Alliance',
			`10` = 'DUP',
			`11` = 'SF',
			`12` = 'UKIP',
			`13` = 'BNP')

		### Occupation
		### 231 = College, university and higher education teaching professionals
		occ_var <- paste0(l, "_jbisco88_cc")
		keep <- which(indresp[,occ_var] == 231)
		indresp$isHE <- 0
		indresp$isHE[keep] <- 1

### Education
                educ_var <- paste0(l, "_qfhigh_dv")
                keep <- which(indresp[,educ_var] == 1)
                indresp$hasPhD <- 0
                indresp$hasPhD[keep] <- 1

		### Weighting var
		w8var <- match(l, w8var.df$l)
		w8var <- w8var.df$var[w8var]
		w8var <- paste0(l, w8var)
		indresp$w8 <- indresp[,w8var]
		df <- indresp[,c("pidp", occ_var, "isHE", "hasPhD", "vi","w8")]
                names(df)[2] <- "occ"
		df$wave <- l
		holder[[which(letters == l)]] <- df
	}

	dat <- do.call("rbind", holder)
	write.csv(dat, file = "data/vi_only.csv", row.names = FALSE)
} else {
	dat <- read.csv("data/vi_only.csv")
}

dat <- subset(dat, !is.na(vi))

## ----plotdf--------------------------------------------------------------
HE_respondents <- with(subset(dat, isHE == 1),
              by(vi, wave, function(x)sum(!is.na(x), na.rm = T)))

minHE <- min(HE_respondents)
maxHE <- max(HE_respondents)

## ----plotprep------------------------------------------------------------
plot.df <- dat %>%
    group_by(wave, isHE, vi) %>%
    summarize(nResps = sum(w8),
              unWeightedResps = n()) %>%
    group_by(wave, isHE) %>%
    mutate(nSample = sum(nResps),
           nUnweighted = sum(unWeightedResps))

plot.df$proportion <- plot.df$nResps / plot.df$nSample
plot.df$lower <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "lower")
plot.df$upper <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "upper")

plot.df$proportion.alt <- plot.df$unWeightedResps / plot.df$nUnweighted
plot.df$lower.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "lower")
plot.df$upper.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "upper")
plot.df$vi <- factor(plot.df$vi,
                     levels = c("Cons","Lab","UKIP","LDem","SNP","Green"),
                     ordered = TRUE)

party.cols <- c("#0087DC",
                "#D50000",
                "#70147A",
                "#FDBB30",
                "#FFFF00",
                "#008142",
                "#99CC33")

## ----plot, fig = TRUE, fig.cap = ""--------------------------------------
ggplot(data = subset(plot.df, wave == "f" & vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
       aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
           fill = vi)) +
    geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
    geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
    scale_fill_manual(values = party.cols) +
    scale_x_discrete("Party closest to...") +
    scale_y_continuous("Proportion", labels = percent) +
    scale_color_manual(values = c("black", "black")) +
    scale_alpha_manual(values = c(0.5, 1)) +
    theme_bw() +
    theme(legend.position = "none") +
    labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
         subtitle = "Source: Understanding Society Wave 6")

## ----plotbywave, fig = TRUE, fig.cap = ""--------------------------------
plot.df$Wave <- charmatch(plot.df$wave, letters)
ggplot(data = subset(plot.df, vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
       aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
           fill = vi)) +
    geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
    geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
    scale_fill_manual(values = party.cols) +
    scale_x_discrete("Party closest to...") +
    scale_y_continuous("Proportion", labels = percent) +
    scale_color_manual(values = c("black", "black")) +
    scale_alpha_manual(values = c(0.5, 1)) +
    theme_bw() +
    theme(legend.position = "none") +
    labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
         subtitle = "Source: Understanding Society, various waves") +
    facet_wrap(~Wave)
	### NOTE: this file assumes you have Understanding Society data
	### in a folder called data/

	## ----loadlibs------------------------------------------------------------
	library(rio)
	library(dplyr)
	library(ggplot2)
	library(scales)
	binom.ci <- function(p, n, alpha = 0.05, type = "lower") {
	halfint <- qnorm(1 - alpha/2) * sqrt(p * (1-p) / n)
	if (type =="lower") {
	retval <- p - halfint
	} else {
	retval <- p + halfint
	}
	return(retval)
	}


	## ----loaddata------------------------------------------------------------
	w8var.df <- data.frame(l = letters[1:6],
	var = c("_indinus_xw","_indinus_xw","_indinub_xw", ## Cross-sectional adult main interview weight
	"_indinub_xw","_indinub_xw","_indinui_xw"))

	if (!file.exists("data/vi_only.csv")) {
	infiles <- list()
	holder <- list()
	for (l in letters[1:6]) {
	infile <- paste0("data/", l, "_indresp.sav")
	indresp <- infiles[[which(letters == l)]] <- import(infile)

	### VI
	### If they are close to a particular party, choose that
	### if they are not, take VI
	votevar1 <- paste0(l, "_vote4")
	votevar2 <- paste0(l, "_vote3")
	indresp$vi <- ifelse(is.element(indresp[,votevar1], 1:13),
	indresp[,votevar1],
	indresp[,votevar2])

	indresp$vi[!is.element(indresp$vi, 1:13)] <- NA

	indresp$vi <- dplyr::recode(indresp$vi,
	`1` = 'Cons',
	`2` = 'Lab',
	`3` = 'LDem',
	`4` = 'SNP',
	`5` = 'PC',
	`6` = 'Green',
	`7` = 'UU',
	`8` = 'SDLP',
	`9` = 'Alliance',
	`10` = 'DUP',
	`11` = 'SF',
	`12` = 'UKIP',
	`13` = 'BNP')

	### Occupation
	### 231 = College, university and higher education teaching professionals
	occ_var <- paste0(l, "_jbisco88_cc")
	keep <- which(indresp[,occ_var] == 231)
	indresp$isHE <- 0
	indresp$isHE[keep] <- 1

	### Education
	educ_var <- paste0(l, "_qfhigh_dv")
	keep <- which(indresp[,educ_var] == 1)
	indresp$hasPhD <- 0
	indresp$hasPhD[keep] <- 1

	### Weighting var
	w8var <- match(l, w8var.df$l)
	w8var <- w8var.df$var[w8var]
	w8var <- paste0(l, w8var)
	indresp$w8 <- indresp[,w8var]
	df <- indresp[,c("pidp", occ_var, "isHE", "hasPhD", "vi","w8")]
	names(df)[2] <- "occ"
	df$wave <- l
	holder[[which(letters == l)]] <- df
	}

	dat <- do.call("rbind", holder)
	write.csv(dat, file = "data/vi_only.csv", row.names = FALSE)
	} else {
	dat <- read.csv("data/vi_only.csv")
	}

	dat <- subset(dat, !is.na(vi))

	## ----plotdf--------------------------------------------------------------
	HE_respondents <- with(subset(dat, isHE == 1),
	by(vi, wave, function(x)sum(!is.na(x), na.rm = T)))

	minHE <- min(HE_respondents)
	maxHE <- max(HE_respondents)

	## ----plotprep------------------------------------------------------------
	plot.df <- dat %>%
	group_by(wave, isHE, vi) %>%
	summarize(nResps = sum(w8),
	unWeightedResps = n()) %>%
	group_by(wave, isHE) %>%
	mutate(nSample = sum(nResps),
	nUnweighted = sum(unWeightedResps))

	plot.df$proportion <- plot.df$nResps / plot.df$nSample
	plot.df$lower <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "lower")
	plot.df$upper <- binom.ci(plot.df$proportion, n = plot.df$nSample, type = "upper")

	plot.df$proportion.alt <- plot.df$unWeightedResps / plot.df$nUnweighted
	plot.df$lower.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "lower")
	plot.df$upper.alt <- binom.ci(plot.df$proportion.alt, n = plot.df$nUnweighted, type = "upper")
	plot.df$vi <- factor(plot.df$vi,
	levels = c("Cons","Lab","UKIP","LDem","SNP","Green"),
	ordered = TRUE)

	party.cols <- c("#0087DC",
	"#D50000",
	"#70147A",
	"#FDBB30",
	"#FFFF00",
	"#008142",
	"#99CC33")

	## ----plot, fig = TRUE, fig.cap = ""--------------------------------------
	ggplot(data = subset(plot.df, wave == "f" & vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
	aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
	fill = vi)) +
	geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
	geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
	scale_fill_manual(values = party.cols) +
	scale_x_discrete("Party closest to...") +
	scale_y_continuous("Proportion", labels = percent) +
	scale_color_manual(values = c("black", "black")) +
	scale_alpha_manual(values = c(0.5, 1)) +
	theme_bw() +
	theme(legend.position = "none") +
	labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
	subtitle = "Source: Understanding Society Wave 6")

	## ----plotbywave, fig = TRUE, fig.cap = ""--------------------------------
	plot.df$Wave <- charmatch(plot.df$wave, letters)
	ggplot(data = subset(plot.df, vi %in% c("Cons","Lab","UKIP","LDem","SNP","Green")),
	aes(x = vi, y = proportion, ymin = lower, ymax = upper, color = factor(isHE),
	fill = vi)) +
	geom_bar(position = position_dodge(width = 0.9), stat = "identity", aes(alpha = factor(isHE))) +
	geom_errorbar(position = position_dodge(width = 0.9), width = 0.25) +
	scale_fill_manual(values = party.cols) +
	scale_x_discrete("Party closest to...") +
	scale_y_continuous("Proportion", labels = percent) +
	scale_color_manual(values = c("black", "black")) +
	scale_alpha_manual(values = c(0.5, 1)) +
	theme_bw() +
	theme(legend.position = "none") +
	labs(title = "Closest party, general population (lighter bars) and HE staff (darker bars)",
	subtitle = "Source: Understanding Society, various waves") +
	facet_wrap(~Wave)