franvillamil/methods-hw1.R

## methods-hw1.R
# ============================== #
# 	MACIS - Methods II	 #
#    Homework 1 - May 1, 2015	 #
# 	------------------	 #
#	Francisco Villamil	 #
# ============================== #

setwd("...")

lapply(c("rms", "lme4", "ggplot2", "foreign", "aod"),
	library, character.only = TRUE)

data = read.dta("ame15hw1.dta")

# 1. Regression
# =============

# Full model with interactions (class * competence)
ptv.model.int = glm(formula = ptv_con ~
	leftright + middle * valence + working * valence + female + age,
	data = data, family = gaussian(link = "identity"))
summary(ptv.model.int)

# Wald tests; all predictors & only interaction terms
wald.test(b = coef(ptv.model.int),
	Sigma = vcov(ptv.model.int),
	Terms = 2:9)

wald.test(b = coef(ptv.model.int),
	Sigma = vcov(ptv.model.int),
	Terms = 8:9)

# Effect of competence
newdata = data.frame(
	valence = rep(c(-1,0,1), 3),
	middle = 	c(0,0,0,0,0,0,1,1,1),
	working = 	c(0,0,0,1,1,1,0,0,0),
	leftright = rep(median(data$leftright, na.rm = TRUE),9),
	age = rep(median(data$age, na.rm = TRUE),9),
	female = rep(median(data$female, na.rm = TRUE),9))

pp = data.frame(
	valence = rep(c("Not capable", "Neither", "Capable"), 3),
	class = c(rep("Neither", 3),
			rep("Working class", 3),
			rep("Middle class", 3)),
	pp = predict(ptv.model.int, newdata = newdata, type = "response"),
	pp.ll = predict(ptv.model.int, newdata = newdata, type = "response") -
		1.96 * predict(ptv.model.int, newdata = newdata, type = "response", se.fit = TRUE)$se.fit,
	pp.ul = predict(ptv.model.int, newdata = newdata, type = "response") +
		1.96 * predict(ptv.model.int, newdata = newdata, type = "response", se.fit = TRUE)$se.fit)

interaction.plot = ggplot(pp, aes(x = class, y = pp, color = valence)) +
	geom_point() +
	geom_segment(aes(x = class, xend = class, y = pp.ll, yend = pp.ul), alpha = 0.5, size = 1.5) +
	theme_bw() +
	theme(legend.position = "right") +
	xlab("Social class\nauto-identification\n") + ylab("PVT Conservative") +
	labs(color = "Is Labour a capable\nor not capable\ngovernment?\n") +
	scale_y_continuous(limits = c(0,10),breaks = c(0,2,4,6,8,10),
		labels = c("Very\nunlikely", seq(2,8,2), "Very\nlikely")) +
	coord_flip()
ggsave("interaction.pdf", interaction.plot, height = 3, width = 8.5)
rm(newdata, pp)

# Effect of competence II: simple slope analysis
workclass.coef = coef(ptv.model.int)[4] + coef(ptv.model.int)[9]
workclass.var = vcov(ptv.model.int)[4,4] + vcov(ptv.model.int)[9,9] + vcov(ptv.model.int)[4,9]
workclass.t = workclass.coef / sqrt(workclass.var)
2 * (1 - pt(abs(workclass.t), df.residual(ptv.model.int))) # P-value = 1.79e-04

midclass.coef = coef(ptv.model.int)[4] + coef(ptv.model.int)[8]
midclass.var = vcov(ptv.model.int)[4,4] + vcov(ptv.model.int)[8,8] + vcov(ptv.model.int)[4,8]
midclass.t = midclass.coef / sqrt(midclass.var)
2 * (1 - pt(abs(midclass.t), df.residual(ptv.model.int))) # P-value = 5.13e-07

noclass.coef = coef(ptv.model.int)[4]
noclass.var = vcov(ptv.model.int)[4,4]
noclass.t = noclass.coef / sqrt(noclass.var)
2 * (1 - pt(abs(noclass.t), df.residual(ptv.model.int))) # P-value = 6.47e-13

# Model without interactions
ptv.model = glm(formula = ptv_con ~
	leftright + valence + middle + working + female + age,
	data = data, family = gaussian(link = "identity"))

# BIC
BIC(ptv.model, ptv.model.int)


# 2. Huber sandwich estimators
# ============================

# Robust standard errors (Huber sandwich estimators)
model = ols(ptv_con ~
	leftright + middle * valence + working * valence + female + age,
	data = data, y = TRUE, x = TRUE)
rob = robcov(model, data$seatname)

coefs = data.frame(
	coef = c(rob$coefficients, ptv.model.int$coefficients),
	se = c(0.3724, 0.0513, 0.2575, 0.1593, 0.2789, 0.1719, 0.0050, 0.2538, 0.2820,
			0.3779, 0.0437, 0.3134, 0.1580, 0.2749, 0.1683, 0.0052, 0.3197, 0.2772),
	model = rep(c("Robust SE", "Normal SE"), each = 9),
	labels = c(1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1,
		0.9, 1.9, 2.9, 3.9, 4.9, 5.9, 6.9, 7.9, 8.9)
	)

coefs = cbind(coefs,
	UL = coefs$coef + (1.96*coefs$se),
	LL = coefs$coef - (1.96*coefs$se))

rob.coefs = ggplot(coefs, aes(x = coef, y = labels, color = model)) +
	geom_point() +
	geom_segment(aes(x = LL, xend = UL, y = labels, yend = labels), alpha = 1, size = 0.5) +
	xlab("\nCoefficient") + ylab("") +
	scale_y_continuous(breaks = c(1:9),
		labels = c("(Intercept)", "leftright", "middle", "valence",
			"working", "female", "age", "middle:valence", "working:valence")) +
	theme_bw() +
	theme(legend.title = element_blank())
ggsave("rob-coefs.pdf", rob.coefs, height = 4, width = 6)


# (Robust) predicted values for competence and middle class (all other variable at median value)
rob.preds = Predict(rob,
	valence = c(-1,0,1),
	working = 0,
	middle = 1,
	female = 1,
	age = 50,
	leftright = 5)

rob.prediction = ggplot(rob.preds, aes(x = yhat, y = valence)) +
	geom_point() +
	geom_segment(aes(x = lower, xend = upper, y = valence, yend = valence), alpha = 1, size = 0.5) +
	theme_bw() +
	xlab("\nPredicted PTV Conservatives") + ylab("Is Labout a capable or\nnot capable government?\n") +
	scale_y_continuous(breaks = c(-1,0,1), labels = c("Not capable", "Neither", "Capable"))
ggsave("rob-competence.pdf", rob.prediction, height = 3, width = 6)

# Simple slope analysis, robust SEs
rob.midclass.coef = coef(rob)[4] + coef(rob)[8]
rob.midclass.var = vcov(rob)[4,4] + vcov(rob)[8,8] + vcov(rob)[4,8]
rob.midclass.t = rob.midclass.coef / sqrt(rob.midclass.var)
2 * (1 - pt(abs(rob.midclass.t), df.residual(rob))) # P-value = 3.63e-09

# 3. Hierarchical linear models
# =============================

# Random effects ANOVA

hlm0 = lmer(ptv_con ~ (1 | seatname),
    data = data, REML = FALSE)
summary(hlm0)


# Random intercept model, and BIC comparison with ptv.model

hlm1 = lmer(ptv_con ~ (1 | seatname) +
	leftright + middle + working + valence + female + age,
    data = data)

summary(hlm1)
BIC(hlm1, ptv.model)

# Random intercept and random slopes model

hlm2 = lmer(ptv_con ~ (1 + female | seatname) +
	leftright + middle + working + valence + age,
    data = data)

summary(hlm2)
BIC(hlm2, hlm1)
	# ============================== #
	# MACIS - Methods II #
	# Homework 1 - May 1, 2015 #
	# ------------------ #
	# Francisco Villamil #
	# ============================== #

	setwd("...")

	lapply(c("rms", "lme4", "ggplot2", "foreign", "aod"),
	library, character.only = TRUE)

	data = read.dta("ame15hw1.dta")

	# 1. Regression
	# =============

	# Full model with interactions (class * competence)
	ptv.model.int = glm(formula = ptv_con ~
	leftright + middle * valence + working * valence + female + age,
	data = data, family = gaussian(link = "identity"))
	summary(ptv.model.int)

	# Wald tests; all predictors & only interaction terms
	wald.test(b = coef(ptv.model.int),
	Sigma = vcov(ptv.model.int),
	Terms = 2:9)

	wald.test(b = coef(ptv.model.int),
	Sigma = vcov(ptv.model.int),
	Terms = 8:9)

	# Effect of competence
	newdata = data.frame(
	valence = rep(c(-1,0,1), 3),
	middle = c(0,0,0,0,0,0,1,1,1),
	working = c(0,0,0,1,1,1,0,0,0),
	leftright = rep(median(data$leftright, na.rm = TRUE),9),
	age = rep(median(data$age, na.rm = TRUE),9),
	female = rep(median(data$female, na.rm = TRUE),9))

	pp = data.frame(
	valence = rep(c("Not capable", "Neither", "Capable"), 3),
	class = c(rep("Neither", 3),
	rep("Working class", 3),
	rep("Middle class", 3)),
	pp = predict(ptv.model.int, newdata = newdata, type = "response"),
	pp.ll = predict(ptv.model.int, newdata = newdata, type = "response") -
	1.96 * predict(ptv.model.int, newdata = newdata, type = "response", se.fit = TRUE)$se.fit,
	pp.ul = predict(ptv.model.int, newdata = newdata, type = "response") +
	1.96 * predict(ptv.model.int, newdata = newdata, type = "response", se.fit = TRUE)$se.fit)

	interaction.plot = ggplot(pp, aes(x = class, y = pp, color = valence)) +
	geom_point() +
	geom_segment(aes(x = class, xend = class, y = pp.ll, yend = pp.ul), alpha = 0.5, size = 1.5) +
	theme_bw() +
	theme(legend.position = "right") +
	xlab("Social class\nauto-identification\n") + ylab("PVT Conservative") +
	labs(color = "Is Labour a capable\nor not capable\ngovernment?\n") +
	scale_y_continuous(limits = c(0,10),breaks = c(0,2,4,6,8,10),
	labels = c("Very\nunlikely", seq(2,8,2), "Very\nlikely")) +
	coord_flip()
	ggsave("interaction.pdf", interaction.plot, height = 3, width = 8.5)
	rm(newdata, pp)

	# Effect of competence II: simple slope analysis
	workclass.coef = coef(ptv.model.int)[4] + coef(ptv.model.int)[9]
	workclass.var = vcov(ptv.model.int)[4,4] + vcov(ptv.model.int)[9,9] + vcov(ptv.model.int)[4,9]
	workclass.t = workclass.coef / sqrt(workclass.var)
	2 * (1 - pt(abs(workclass.t), df.residual(ptv.model.int))) # P-value = 1.79e-04

	midclass.coef = coef(ptv.model.int)[4] + coef(ptv.model.int)[8]
	midclass.var = vcov(ptv.model.int)[4,4] + vcov(ptv.model.int)[8,8] + vcov(ptv.model.int)[4,8]
	midclass.t = midclass.coef / sqrt(midclass.var)
	2 * (1 - pt(abs(midclass.t), df.residual(ptv.model.int))) # P-value = 5.13e-07

	noclass.coef = coef(ptv.model.int)[4]
	noclass.var = vcov(ptv.model.int)[4,4]
	noclass.t = noclass.coef / sqrt(noclass.var)
	2 * (1 - pt(abs(noclass.t), df.residual(ptv.model.int))) # P-value = 6.47e-13

	# Model without interactions
	ptv.model = glm(formula = ptv_con ~
	leftright + valence + middle + working + female + age,
	data = data, family = gaussian(link = "identity"))

	# BIC
	BIC(ptv.model, ptv.model.int)


	# 2. Huber sandwich estimators
	# ============================

	# Robust standard errors (Huber sandwich estimators)
	model = ols(ptv_con ~
	leftright + middle * valence + working * valence + female + age,
	data = data, y = TRUE, x = TRUE)
	rob = robcov(model, data$seatname)

	coefs = data.frame(
	coef = c(rob$coefficients, ptv.model.int$coefficients),
	se = c(0.3724, 0.0513, 0.2575, 0.1593, 0.2789, 0.1719, 0.0050, 0.2538, 0.2820,
	0.3779, 0.0437, 0.3134, 0.1580, 0.2749, 0.1683, 0.0052, 0.3197, 0.2772),
	model = rep(c("Robust SE", "Normal SE"), each = 9),
	labels = c(1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1,
	0.9, 1.9, 2.9, 3.9, 4.9, 5.9, 6.9, 7.9, 8.9)
	)

	coefs = cbind(coefs,
	UL = coefs$coef + (1.96*coefs$se),
	LL = coefs$coef - (1.96*coefs$se))

	rob.coefs = ggplot(coefs, aes(x = coef, y = labels, color = model)) +
	geom_point() +
	geom_segment(aes(x = LL, xend = UL, y = labels, yend = labels), alpha = 1, size = 0.5) +
	xlab("\nCoefficient") + ylab("") +
	scale_y_continuous(breaks = c(1:9),
	labels = c("(Intercept)", "leftright", "middle", "valence",
	"working", "female", "age", "middle:valence", "working:valence")) +
	theme_bw() +
	theme(legend.title = element_blank())
	ggsave("rob-coefs.pdf", rob.coefs, height = 4, width = 6)


	# (Robust) predicted values for competence and middle class (all other variable at median value)
	rob.preds = Predict(rob,
	valence = c(-1,0,1),
	working = 0,
	middle = 1,
	female = 1,
	age = 50,
	leftright = 5)

	rob.prediction = ggplot(rob.preds, aes(x = yhat, y = valence)) +
	geom_point() +
	geom_segment(aes(x = lower, xend = upper, y = valence, yend = valence), alpha = 1, size = 0.5) +
	theme_bw() +
	xlab("\nPredicted PTV Conservatives") + ylab("Is Labout a capable or\nnot capable government?\n") +
	scale_y_continuous(breaks = c(-1,0,1), labels = c("Not capable", "Neither", "Capable"))
	ggsave("rob-competence.pdf", rob.prediction, height = 3, width = 6)

	# Simple slope analysis, robust SEs
	rob.midclass.coef = coef(rob)[4] + coef(rob)[8]
	rob.midclass.var = vcov(rob)[4,4] + vcov(rob)[8,8] + vcov(rob)[4,8]
	rob.midclass.t = rob.midclass.coef / sqrt(rob.midclass.var)
	2 * (1 - pt(abs(rob.midclass.t), df.residual(rob))) # P-value = 3.63e-09

	# 3. Hierarchical linear models
	# =============================

	# Random effects ANOVA

	hlm0 = lmer(ptv_con ~ (1 \| seatname),
	data = data, REML = FALSE)
	summary(hlm0)


	# Random intercept model, and BIC comparison with ptv.model

	hlm1 = lmer(ptv_con ~ (1 \| seatname) +
	leftright + middle + working + valence + female + age,
	data = data)

	summary(hlm1)
	BIC(hlm1, ptv.model)

	# Random intercept and random slopes model

	hlm2 = lmer(ptv_con ~ (1 + female \| seatname) +
	leftright + middle + working + valence + age,
	data = data)

	summary(hlm2)
	BIC(hlm2, hlm1)