Last active
August 29, 2015 14:21
-
-
Save franvillamil/c01f52dca640323947d1 to your computer and use it in GitHub Desktop.
Methods II - Homework 2 code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ===================================== # | |
# MACIS - Methods II # | |
# Homework 2 - May 15, 2015 # | |
# -------- # | |
# Francisco Villamil # | |
# ===================================== # | |
setwd("~/Dropbox/MACIS/Methods\ II/hw2") | |
lapply(c("foreign", "ggplot2", "MASS", "VGAM", "boot"), | |
library, character.only = TRUE) | |
data1 = read.dta("ame15hw2a.dta") | |
data2 = read.dta("ame15hw2b.dta") | |
set.seed(610632) | |
# ======================================================= | |
### 1. Binary Logit | |
# Prepare data, delete NAs | |
data1 = na.omit(data1) | |
data1 = droplevels(data1) | |
data1$fullterm = ifelse(data1$fullterm == "Yes", 1, 0) | |
## Full logit model, curvilinear effect for bargain | |
m1 = glm(fullterm ~ type + preference + bicameral + volatility + bargain + I(bargain^2) + country, | |
family = binomial(link = "logit"), data = data1) | |
summary(m1) | |
## Predicted probabilities for preference (from min to max value) | |
# New data | |
newdata.m1 = data.frame( | |
preference = seq(0,120,1), | |
type = rep("MWC", 121), | |
bicameral = rep("Yes", 121), | |
volatility = rep(mean(data1$volatility), 121), | |
bargain = rep(mean(data1$bargain), 121), | |
country = rep("Finland", 121) | |
) | |
# Probabilities and SEs | |
pp.m1 = data.frame( | |
preference = seq(0,120,1), | |
pp = predict(m1, newdata = newdata.m1, type = "response"), | |
UL = predict(m1, newdata = newdata.m1, type = "response") + | |
1.96 * predict(m1, newdata = newdata.m1, type = "response", se.fit = TRUE)$se.fit, | |
LL = predict(m1, newdata = newdata.m1, type = "response") - | |
1.96 * predict(m1, newdata = newdata.m1, type = "response", se.fit = TRUE)$se.fit | |
) | |
# Limit LL and UL for plotting | |
pp.m1$LL[pp.m1$LL<0] = 0 | |
pp.m1$UL[pp.m1$UL>1] = 1 | |
# Plotting | |
plot.pp.m1 = ggplot(pp.m1, aes(x = preference, y = pp)) + | |
geom_line() + geom_ribbon(aes(ymin = LL, ymax = UL), alpha = .25) + | |
theme_bw() + | |
xlab("\nPreference range in the coalition") + | |
ylab("Prob. of lasting for a full term\n") + | |
scale_y_continuous(limits = c(0,1), breaks = seq(0, 1, 0.2)) + | |
scale_x_continuous(limits = c(0,120), breaks = seq(0,120,20)) + | |
geom_point(data = subset(data1), aes(x = preference, y = fullterm), size = 2.5, shape = 73) | |
ggsave("pp-plot1.pdf", plot.pp.m1, width = 6, height = 6) | |
## Discrete change between 10th and 90th percentile using simulation | |
# Seed, and get 10000 betas | |
betas = mvrnorm(10000, coef(m1), vcov(m1)) | |
# Set values | |
values010 = c(1, 1, 0, mean(data1$preference), 1, mean(data1$volatility), | |
quantile(data1$bargain, 0.1), quantile(data1$bargain, 0.1)^2, | |
0, 0, 1, rep(0,22)) | |
values090 = c(1, 1, 0, mean(data1$preference), 1, mean(data1$volatility), | |
quantile(data1$bargain, 0.9), quantile(data1$bargain, 0.9)^2, | |
0, 0, 1, rep(0,22)) | |
# Get probabilities | |
pp010 = exp(betas %*% values010) / (1 + exp(betas %*% values010)) | |
pp090 = exp(betas %*% values090) / (1 + exp(betas %*% values090)) | |
# Mean, CIs, and difference | |
change = data.frame( | |
pp010 = c(mean(pp010), quantile(pp010, 0.975), quantile(pp010, 0.025)), | |
pp090 = c(mean(pp090), quantile(pp090, 0.975), quantile(pp090, 0.025)), | |
difference = c(mean(pp090 - pp010), quantile((pp090 - pp010), 0.975), quantile((pp090 - pp010), 0.025)) | |
) | |
# Comparison between models with and without bargain quadratic term | |
m1b = glm(fullterm ~ type + preference + bicameral + volatility + bargain + country, | |
family = binomial(link = "logit"), data = data1) | |
BIC(m1,m1b) | |
# ======================================================= | |
## 2. Ordinal Logit | |
# Prepare variables | |
data2 = na.omit(data2) | |
data2 = droplevels(data2) | |
summary(data2) | |
data2$educ = as.numeric(data2$educ) | |
data2$urban = as.numeric(data2$urban) | |
## Full model | |
m2 = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
data = data2, Hess = TRUE) | |
summary(m2) | |
## Predicted probabilities by left-right selfplacement, by bootstrapping | |
set.seed(234132) | |
# New data | |
newdata.m2 = data.frame( | |
urban = rep(mean(data2$urban), 101), | |
leftright = seq(0, 10, 0.1), | |
sex = rep("Male", 101), | |
birthyr = rep(mean(data2$birthyr), 101), | |
educ = rep(mean(data2$educ), 101), | |
immiback = rep("Dutch", 101) | |
) | |
# Probabilities for "Fully Disagree" response | |
boot.function1 = function(data, indices){ | |
data = data2[indices, ] | |
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
data = data, Hess = TRUE) | |
return(predict(fit, newdata.m2, type = "probs")[,1]) | |
} | |
boot1 = boot(data = data2, statistic = boot.function1, 1000) | |
boot.fullydis = data.frame( | |
mean = rep(NA,101), | |
UL = rep(NA,101), | |
LL = rep(NA,101)) | |
for (i in 1:101) { | |
bootci = boot.ci(boot1, index = i, type = "norm") | |
boot.fullydis[i, ] = c(bootci$t0, bootci$normal[2:3]) | |
} | |
boot.fullydis = cbind(boot.fullydis, response = rep("Fully Disagree", 101)) | |
# Probabilities for "Disagree" response | |
boot.function2 = function(data, indices){ | |
data = data2[indices, ] | |
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
data = data, Hess = TRUE) | |
return(predict(fit, newdata.m2, type = "probs")[,2]) | |
} | |
boot2 = boot(data = data2, statistic = boot.function2, 1000) | |
boot.dis = data.frame( | |
mean = rep(NA,101), | |
UL = rep(NA,101), | |
LL = rep(NA,101)) | |
for (i in 1:101) { | |
bootci = boot.ci(boot2, index = i, type = "norm") | |
boot.dis[i, ] = c(bootci$t0, bootci$normal[2:3]) | |
} | |
boot.dis = cbind(boot.dis, response = rep("Disagree", 101)) | |
# Probabilities for "Agree" response | |
boot.function3 = function(data, indices){ | |
data = data2[indices, ] | |
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
data = data, Hess = TRUE) | |
return(predict(fit, newdata.m2, type = "probs")[,3]) | |
} | |
boot3 = boot(data = data2, statistic = boot.function3, 1000) | |
boot.agree = data.frame( | |
mean = rep(NA,101), | |
UL = rep(NA,101), | |
LL = rep(NA,101)) | |
for (i in 1:101) { | |
bootci = boot.ci(boot3, index = i, type = "norm") | |
boot.agree[i, ] = c(bootci$t0, bootci$normal[2:3]) | |
} | |
boot.agree = cbind(boot.agree, response = rep("Agree", 101)) | |
# Probabilities for "Fully Agree" response | |
boot.function4 = function(data, indices){ | |
data = data2[indices, ] | |
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
data = data, Hess = TRUE) | |
return(predict(fit, newdata.m2, type = "probs")[,4]) | |
} | |
boot4 = boot(data = data2, statistic = boot.function4, 1000) | |
boot.fullyagree = data.frame( | |
mean = rep(NA,101), | |
UL = rep(NA,101), | |
LL = rep(NA,101)) | |
for (i in 1:101) { | |
bootci = boot.ci(boot4, index = i, type = "norm") | |
boot.fullyagree[i, ] = c(bootci$t0, bootci$normal[2:3]) | |
} | |
boot.fullyagree = cbind(boot.fullyagree, response = rep("Fully Agree", 101)) | |
# Put together all probabilities | |
boot.pp = rbind(boot.fullydis, boot.dis, boot.agree, boot.fullyagree) | |
boot.pp = cbind(boot.pp, id = rep(1:101, 4)) | |
# Plotting | |
plot.pp.m2.boot = ggplot(boot.pp, aes(x = id, y = mean, group = response)) + | |
geom_line(aes(color = response), size = 0.75) + | |
geom_ribbon(aes(ymin = LL, ymax = UL, fill = response), alpha = 0.25) + | |
#facet_wrap(~ response, ncol = 2) + | |
theme_bw() + | |
scale_x_continuous(limits = c(0,100), breaks = seq(0,100,20), | |
labels = c("Left","2","4","6","8","Right")) + | |
scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) + | |
theme(legend.title = element_blank()) + | |
xlab("\nLeft-right self-placement") + | |
ylab("Probability\n") | |
ggsave("pp-m2-boot.pdf", plot.pp.m2.boot, height = 5, width = 6) | |
## Predicted probabilities by immigration background, CIs by simulation | |
set.seed(2147129) | |
# 10000 Betas | |
betas = mvrnorm(10000, summary(m2)$coefficients[,1], vcov(m2)) | |
# Variable settings | |
v.dutch = c(mean(data2$urban), mean(data2$leftright), 1, | |
mean(data2$birthyr), mean(data2$educ), 0, 0) | |
v.west = c(mean(data2$urban), mean(data2$leftright), 1, | |
mean(data2$birthyr), mean(data2$educ), 1, 0) | |
v.nonwest = c(mean(data2$urban), mean(data2$leftright), 1, | |
mean(data2$birthyr), mean(data2$educ), 0, 1) | |
# Simulated probabilities for Dutch | |
p.dutch = data.frame( | |
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.dutch) )), | |
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.dutch) ))) - | |
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.dutch) ))), | |
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.dutch) ))) - | |
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.dutch) ))), | |
FullyAgree = 1 - | |
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.dutch) ))) | |
) | |
# Simulated probabilities for Western Alien | |
p.west = data.frame( | |
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.west) )), | |
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.west) ))) - | |
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.west) ))), | |
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.west) ))) - | |
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.west) ))), | |
FullyAgree = 1 - | |
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.west) ))) | |
) | |
# Simulated probabilities for Non-Western Alien | |
p.nonwest = data.frame( | |
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.nonwest) )), | |
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.nonwest) ))) - | |
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.nonwest) ))), | |
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.nonwest) ))) - | |
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.nonwest) ))), | |
FullyAgree = 1 - | |
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.nonwest) ))) | |
) | |
# Differences between Dutch/Western, Dutch/Non-Western, Western/Non-Western | |
diff.dutch.west = data.frame( | |
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"), | |
Mean.Dutch = c(mean(p.dutch$FullyDisagree), mean(p.dutch$Disagree), mean(p.dutch$Agree), mean(p.dutch$FullyAgree)), | |
dutchUL = c(quantile(p.dutch$FullyDisagree, 0.975), quantile(p.dutch$Disagree, 0.975), | |
quantile(p.dutch$Agree, 0.975), quantile(p.dutch$FullyAgree, 0.975)), | |
dutchLL = c(quantile(p.dutch$FullyDisagree, 0.025), quantile(p.dutch$Disagree, 0.025), | |
quantile(p.dutch$Agree, 0.025), quantile(p.dutch$FullyAgree, 0.025)), | |
Mean.Western = c(mean(p.west$FullyDisagree), mean(p.west$Disagree), mean(p.west$Agree), mean(p.west$FullyAgree)), | |
westernUL = c(quantile(p.west$FullyDisagree, 0.975), quantile(p.west$Disagree, 0.975), | |
quantile(p.west$Agree, 0.975), quantile(p.west$FullyAgree, 0.975)), | |
westernLL = c(quantile(p.west$FullyDisagree, 0.025), quantile(p.west$Disagree, 0.025), | |
quantile(p.west$Agree, 0.025), quantile(p.west$FullyAgree, 0.025)), | |
Difference = c(mean(p.dutch$FullyDisagree - p.west$FullyDisagree), | |
mean(p.dutch$Disagree - p.west$Disagree), | |
mean(p.dutch$Agree - p.west$Agree), | |
mean(p.dutch$FullyAgree - p.west$FullyAgree)), | |
diffUL = c(quantile((p.dutch$FullyDisagree - p.west$FullyDisagree), 0.975), | |
quantile((p.dutch$Disagree - p.west$Disagree), 0.975), | |
quantile((p.dutch$Agree - p.west$Agree), 0.975), | |
quantile((p.dutch$FullyAgree - p.west$FullyAgree), 0.975)), | |
diffLL = c(quantile((p.dutch$FullyDisagree - p.west$FullyDisagree), 0.025), | |
quantile((p.dutch$Disagree - p.west$Disagree), 0.025), | |
quantile((p.dutch$Agree - p.west$Agree), 0.025), | |
quantile((p.dutch$FullyAgree - p.west$FullyAgree), 0.025)) | |
) | |
diff.dutch.west[,2:10] = round(diff.dutch.west[,2:10],3) | |
# -------- | |
diff.dutch.nonwest = data.frame( | |
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"), | |
Mean.Dutch = c(mean(p.dutch$FullyDisagree), mean(p.dutch$Disagree), mean(p.dutch$Agree), mean(p.dutch$FullyAgree)), | |
dutchUL = c(quantile(p.dutch$FullyDisagree, 0.975), quantile(p.dutch$Disagree, 0.975), | |
quantile(p.dutch$Agree, 0.975), quantile(p.dutch$FullyAgree, 0.975)), | |
dutchLL = c(quantile(p.dutch$FullyDisagree, 0.025), quantile(p.dutch$Disagree, 0.025), | |
quantile(p.dutch$Agree, 0.025), quantile(p.dutch$FullyAgree, 0.025)), | |
Mean.NonWestern = c(mean(p.nonwest$FullyDisagree), mean(p.nonwest$Disagree), mean(p.nonwest$Agree), mean(p.nonwest$FullyAgree)), | |
nonWesternUL = c(quantile(p.nonwest$FullyDisagree, 0.975), quantile(p.nonwest$Disagree, 0.975), | |
quantile(p.nonwest$Agree, 0.975), quantile(p.nonwest$FullyAgree, 0.975)), | |
nonWesternLL = c(quantile(p.nonwest$FullyDisagree, 0.025), quantile(p.nonwest$Disagree, 0.025), | |
quantile(p.nonwest$Agree, 0.025), quantile(p.nonwest$FullyAgree, 0.025)), | |
Difference = c(mean(p.dutch$FullyDisagree - p.nonwest$FullyDisagree), | |
mean(p.dutch$Disagree - p.nonwest$Disagree), | |
mean(p.dutch$Agree - p.nonwest$Agree), | |
mean(p.dutch$FullyAgree - p.nonwest$FullyAgree)), | |
diffUL = c(quantile((p.dutch$FullyDisagree - p.nonwest$FullyDisagree), 0.975), | |
quantile((p.dutch$Disagree - p.nonwest$Disagree), 0.975), | |
quantile((p.dutch$Agree - p.nonwest$Agree), 0.975), | |
quantile((p.dutch$FullyAgree - p.nonwest$FullyAgree), 0.975)), | |
diffLL = c(quantile((p.dutch$FullyDisagree - p.nonwest$FullyDisagree), 0.025), | |
quantile((p.dutch$Disagree - p.nonwest$Disagree), 0.025), | |
quantile((p.dutch$Agree - p.nonwest$Agree), 0.025), | |
quantile((p.dutch$FullyAgree - p.nonwest$FullyAgree), 0.025)) | |
) | |
diff.dutch.nonwest[,2:10] = round(diff.dutch.nonwest[,2:10],3) | |
# -------- | |
diff.dutch.nonwest = data.frame( | |
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"), | |
Mean.Dutch = c(mean(p.west$FullyDisagree), mean(p.west$Disagree), mean(p.west$Agree), mean(p.west$FullyAgree)), | |
DutchUL = c(quantile(p.west$FullyDisagree, 0.975), quantile(p.west$Disagree, 0.975), | |
quantile(p.west$Agree, 0.975), quantile(p.west$FullyAgree, 0.975)), | |
DutchLL = c(quantile(p.west$FullyDisagree, 0.025), quantile(p.west$Disagree, 0.025), | |
quantile(p.west$Agree, 0.025), quantile(p.west$FullyAgree, 0.025)), | |
Mean.NonWestern = c(mean(p.nonwest$FullyDisagree), mean(p.nonwest$Disagree), mean(p.nonwest$Agree), mean(p.nonwest$FullyAgree)), | |
nonWesternUL = c(quantile(p.nonwest$FullyDisagree, 0.975), quantile(p.nonwest$Disagree, 0.975), | |
quantile(p.nonwest$Agree, 0.975), quantile(p.nonwest$FullyAgree, 0.975)), | |
nonWesternLL = c(quantile(p.nonwest$FullyDisagree, 0.025), quantile(p.nonwest$Disagree, 0.025), | |
quantile(p.nonwest$Agree, 0.025), quantile(p.nonwest$FullyAgree, 0.025)), | |
Difference = c(mean(p.west$FullyDisagree - p.nonwest$FullyDisagree), | |
mean(p.west$Disagree - p.nonwest$Disagree), | |
mean(p.west$Agree - p.nonwest$Agree), | |
mean(p.west$FullyAgree - p.nonwest$FullyAgree)), | |
diffUL = c(quantile((p.west$FullyDisagree - p.nonwest$FullyDisagree), 0.975), | |
quantile((p.west$Disagree - p.nonwest$Disagree), 0.975), | |
quantile((p.west$Agree - p.nonwest$Agree), 0.975), | |
quantile((p.west$FullyAgree - p.nonwest$FullyAgree), 0.975)), | |
diffLL = c(quantile((p.west$FullyDisagree - p.nonwest$FullyDisagree), 0.025), | |
quantile((p.west$Disagree - p.nonwest$Disagree), 0.025), | |
quantile((p.west$Agree - p.nonwest$Agree), 0.025), | |
quantile((p.west$FullyAgree - p.nonwest$FullyAgree), 0.025)) | |
) | |
diff.west.nonwest[,2:10] = round(diff.west.nonwest[,2:10],3) | |
## Cumulative odds for an 1-unit increase in education | |
exp(summary(m2)$coefficients[5, 1]) # = 0.6110813 | |
## Relaxing proportional odds assumption for urbanization | |
data2$muslim = ordered(data2$muslim) | |
m2b = vglm(muslim ~ urban + leftright + sex + birthyr + educ + immiback, | |
cumulative(parallel = FALSE ~ urban, reverse = TRUE), data = data2) | |
# Cumulative odds for urbanization with new model | |
exp(coefficients(m2b)[4]) # 0.825 for Pr(y > 1) / Pr(y =< 1) | |
exp(coefficients(m2b)[5]) # 0.956 for Pr(y > 2) / Pr(y =< 2) | |
exp(coefficients(m2b)[6]) # 1.066 for Pr(y > 3) / Pr(y =< 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment