Skip to content

Instantly share code, notes, and snippets.

@franvillamil
Last active August 29, 2015 14:21
Show Gist options
  • Save franvillamil/c01f52dca640323947d1 to your computer and use it in GitHub Desktop.
Save franvillamil/c01f52dca640323947d1 to your computer and use it in GitHub Desktop.
Methods II - Homework 2 code
# ===================================== #
# MACIS - Methods II #
# Homework 2 - May 15, 2015 #
# -------- #
# Francisco Villamil #
# ===================================== #
setwd("~/Dropbox/MACIS/Methods\ II/hw2")
lapply(c("foreign", "ggplot2", "MASS", "VGAM", "boot"),
library, character.only = TRUE)
data1 = read.dta("ame15hw2a.dta")
data2 = read.dta("ame15hw2b.dta")
set.seed(610632)
# =======================================================
### 1. Binary Logit
# Prepare data, delete NAs
data1 = na.omit(data1)
data1 = droplevels(data1)
data1$fullterm = ifelse(data1$fullterm == "Yes", 1, 0)
## Full logit model, curvilinear effect for bargain
m1 = glm(fullterm ~ type + preference + bicameral + volatility + bargain + I(bargain^2) + country,
family = binomial(link = "logit"), data = data1)
summary(m1)
## Predicted probabilities for preference (from min to max value)
# New data
newdata.m1 = data.frame(
preference = seq(0,120,1),
type = rep("MWC", 121),
bicameral = rep("Yes", 121),
volatility = rep(mean(data1$volatility), 121),
bargain = rep(mean(data1$bargain), 121),
country = rep("Finland", 121)
)
# Probabilities and SEs
pp.m1 = data.frame(
preference = seq(0,120,1),
pp = predict(m1, newdata = newdata.m1, type = "response"),
UL = predict(m1, newdata = newdata.m1, type = "response") +
1.96 * predict(m1, newdata = newdata.m1, type = "response", se.fit = TRUE)$se.fit,
LL = predict(m1, newdata = newdata.m1, type = "response") -
1.96 * predict(m1, newdata = newdata.m1, type = "response", se.fit = TRUE)$se.fit
)
# Limit LL and UL for plotting
pp.m1$LL[pp.m1$LL<0] = 0
pp.m1$UL[pp.m1$UL>1] = 1
# Plotting
plot.pp.m1 = ggplot(pp.m1, aes(x = preference, y = pp)) +
geom_line() + geom_ribbon(aes(ymin = LL, ymax = UL), alpha = .25) +
theme_bw() +
xlab("\nPreference range in the coalition") +
ylab("Prob. of lasting for a full term\n") +
scale_y_continuous(limits = c(0,1), breaks = seq(0, 1, 0.2)) +
scale_x_continuous(limits = c(0,120), breaks = seq(0,120,20)) +
geom_point(data = subset(data1), aes(x = preference, y = fullterm), size = 2.5, shape = 73)
ggsave("pp-plot1.pdf", plot.pp.m1, width = 6, height = 6)
## Discrete change between 10th and 90th percentile using simulation
# Seed, and get 10000 betas
betas = mvrnorm(10000, coef(m1), vcov(m1))
# Set values
values010 = c(1, 1, 0, mean(data1$preference), 1, mean(data1$volatility),
quantile(data1$bargain, 0.1), quantile(data1$bargain, 0.1)^2,
0, 0, 1, rep(0,22))
values090 = c(1, 1, 0, mean(data1$preference), 1, mean(data1$volatility),
quantile(data1$bargain, 0.9), quantile(data1$bargain, 0.9)^2,
0, 0, 1, rep(0,22))
# Get probabilities
pp010 = exp(betas %*% values010) / (1 + exp(betas %*% values010))
pp090 = exp(betas %*% values090) / (1 + exp(betas %*% values090))
# Mean, CIs, and difference
change = data.frame(
pp010 = c(mean(pp010), quantile(pp010, 0.975), quantile(pp010, 0.025)),
pp090 = c(mean(pp090), quantile(pp090, 0.975), quantile(pp090, 0.025)),
difference = c(mean(pp090 - pp010), quantile((pp090 - pp010), 0.975), quantile((pp090 - pp010), 0.025))
)
# Comparison between models with and without bargain quadratic term
m1b = glm(fullterm ~ type + preference + bicameral + volatility + bargain + country,
family = binomial(link = "logit"), data = data1)
BIC(m1,m1b)
# =======================================================
## 2. Ordinal Logit
# Prepare variables
data2 = na.omit(data2)
data2 = droplevels(data2)
summary(data2)
data2$educ = as.numeric(data2$educ)
data2$urban = as.numeric(data2$urban)
## Full model
m2 = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
data = data2, Hess = TRUE)
summary(m2)
## Predicted probabilities by left-right selfplacement, by bootstrapping
set.seed(234132)
# New data
newdata.m2 = data.frame(
urban = rep(mean(data2$urban), 101),
leftright = seq(0, 10, 0.1),
sex = rep("Male", 101),
birthyr = rep(mean(data2$birthyr), 101),
educ = rep(mean(data2$educ), 101),
immiback = rep("Dutch", 101)
)
# Probabilities for "Fully Disagree" response
boot.function1 = function(data, indices){
data = data2[indices, ]
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
data = data, Hess = TRUE)
return(predict(fit, newdata.m2, type = "probs")[,1])
}
boot1 = boot(data = data2, statistic = boot.function1, 1000)
boot.fullydis = data.frame(
mean = rep(NA,101),
UL = rep(NA,101),
LL = rep(NA,101))
for (i in 1:101) {
bootci = boot.ci(boot1, index = i, type = "norm")
boot.fullydis[i, ] = c(bootci$t0, bootci$normal[2:3])
}
boot.fullydis = cbind(boot.fullydis, response = rep("Fully Disagree", 101))
# Probabilities for "Disagree" response
boot.function2 = function(data, indices){
data = data2[indices, ]
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
data = data, Hess = TRUE)
return(predict(fit, newdata.m2, type = "probs")[,2])
}
boot2 = boot(data = data2, statistic = boot.function2, 1000)
boot.dis = data.frame(
mean = rep(NA,101),
UL = rep(NA,101),
LL = rep(NA,101))
for (i in 1:101) {
bootci = boot.ci(boot2, index = i, type = "norm")
boot.dis[i, ] = c(bootci$t0, bootci$normal[2:3])
}
boot.dis = cbind(boot.dis, response = rep("Disagree", 101))
# Probabilities for "Agree" response
boot.function3 = function(data, indices){
data = data2[indices, ]
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
data = data, Hess = TRUE)
return(predict(fit, newdata.m2, type = "probs")[,3])
}
boot3 = boot(data = data2, statistic = boot.function3, 1000)
boot.agree = data.frame(
mean = rep(NA,101),
UL = rep(NA,101),
LL = rep(NA,101))
for (i in 1:101) {
bootci = boot.ci(boot3, index = i, type = "norm")
boot.agree[i, ] = c(bootci$t0, bootci$normal[2:3])
}
boot.agree = cbind(boot.agree, response = rep("Agree", 101))
# Probabilities for "Fully Agree" response
boot.function4 = function(data, indices){
data = data2[indices, ]
fit = polr(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
data = data, Hess = TRUE)
return(predict(fit, newdata.m2, type = "probs")[,4])
}
boot4 = boot(data = data2, statistic = boot.function4, 1000)
boot.fullyagree = data.frame(
mean = rep(NA,101),
UL = rep(NA,101),
LL = rep(NA,101))
for (i in 1:101) {
bootci = boot.ci(boot4, index = i, type = "norm")
boot.fullyagree[i, ] = c(bootci$t0, bootci$normal[2:3])
}
boot.fullyagree = cbind(boot.fullyagree, response = rep("Fully Agree", 101))
# Put together all probabilities
boot.pp = rbind(boot.fullydis, boot.dis, boot.agree, boot.fullyagree)
boot.pp = cbind(boot.pp, id = rep(1:101, 4))
# Plotting
plot.pp.m2.boot = ggplot(boot.pp, aes(x = id, y = mean, group = response)) +
geom_line(aes(color = response), size = 0.75) +
geom_ribbon(aes(ymin = LL, ymax = UL, fill = response), alpha = 0.25) +
#facet_wrap(~ response, ncol = 2) +
theme_bw() +
scale_x_continuous(limits = c(0,100), breaks = seq(0,100,20),
labels = c("Left","2","4","6","8","Right")) +
scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
theme(legend.title = element_blank()) +
xlab("\nLeft-right self-placement") +
ylab("Probability\n")
ggsave("pp-m2-boot.pdf", plot.pp.m2.boot, height = 5, width = 6)
## Predicted probabilities by immigration background, CIs by simulation
set.seed(2147129)
# 10000 Betas
betas = mvrnorm(10000, summary(m2)$coefficients[,1], vcov(m2))
# Variable settings
v.dutch = c(mean(data2$urban), mean(data2$leftright), 1,
mean(data2$birthyr), mean(data2$educ), 0, 0)
v.west = c(mean(data2$urban), mean(data2$leftright), 1,
mean(data2$birthyr), mean(data2$educ), 1, 0)
v.nonwest = c(mean(data2$urban), mean(data2$leftright), 1,
mean(data2$birthyr), mean(data2$educ), 0, 1)
# Simulated probabilities for Dutch
p.dutch = data.frame(
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.dutch) )),
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.dutch) ))) -
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.dutch) ))),
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.dutch) ))) -
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.dutch) ))),
FullyAgree = 1 -
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.dutch) )))
)
# Simulated probabilities for Western Alien
p.west = data.frame(
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.west) )),
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.west) ))) -
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.west) ))),
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.west) ))) -
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.west) ))),
FullyAgree = 1 -
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.west) )))
)
# Simulated probabilities for Non-Western Alien
p.nonwest = data.frame(
FullyDisagree = 1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.nonwest) )),
Disagree = (1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.nonwest) ))) -
(1 / (1 + exp( -(betas[, 8] - betas[,1:7] %*% v.nonwest) ))),
Agree = (1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.nonwest) ))) -
(1 / (1 + exp( -(betas[, 9] - betas[,1:7] %*% v.nonwest) ))),
FullyAgree = 1 -
(1 / (1 + exp( -(betas[, 10] - betas[,1:7] %*% v.nonwest) )))
)
# Differences between Dutch/Western, Dutch/Non-Western, Western/Non-Western
diff.dutch.west = data.frame(
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"),
Mean.Dutch = c(mean(p.dutch$FullyDisagree), mean(p.dutch$Disagree), mean(p.dutch$Agree), mean(p.dutch$FullyAgree)),
dutchUL = c(quantile(p.dutch$FullyDisagree, 0.975), quantile(p.dutch$Disagree, 0.975),
quantile(p.dutch$Agree, 0.975), quantile(p.dutch$FullyAgree, 0.975)),
dutchLL = c(quantile(p.dutch$FullyDisagree, 0.025), quantile(p.dutch$Disagree, 0.025),
quantile(p.dutch$Agree, 0.025), quantile(p.dutch$FullyAgree, 0.025)),
Mean.Western = c(mean(p.west$FullyDisagree), mean(p.west$Disagree), mean(p.west$Agree), mean(p.west$FullyAgree)),
westernUL = c(quantile(p.west$FullyDisagree, 0.975), quantile(p.west$Disagree, 0.975),
quantile(p.west$Agree, 0.975), quantile(p.west$FullyAgree, 0.975)),
westernLL = c(quantile(p.west$FullyDisagree, 0.025), quantile(p.west$Disagree, 0.025),
quantile(p.west$Agree, 0.025), quantile(p.west$FullyAgree, 0.025)),
Difference = c(mean(p.dutch$FullyDisagree - p.west$FullyDisagree),
mean(p.dutch$Disagree - p.west$Disagree),
mean(p.dutch$Agree - p.west$Agree),
mean(p.dutch$FullyAgree - p.west$FullyAgree)),
diffUL = c(quantile((p.dutch$FullyDisagree - p.west$FullyDisagree), 0.975),
quantile((p.dutch$Disagree - p.west$Disagree), 0.975),
quantile((p.dutch$Agree - p.west$Agree), 0.975),
quantile((p.dutch$FullyAgree - p.west$FullyAgree), 0.975)),
diffLL = c(quantile((p.dutch$FullyDisagree - p.west$FullyDisagree), 0.025),
quantile((p.dutch$Disagree - p.west$Disagree), 0.025),
quantile((p.dutch$Agree - p.west$Agree), 0.025),
quantile((p.dutch$FullyAgree - p.west$FullyAgree), 0.025))
)
diff.dutch.west[,2:10] = round(diff.dutch.west[,2:10],3)
# --------
diff.dutch.nonwest = data.frame(
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"),
Mean.Dutch = c(mean(p.dutch$FullyDisagree), mean(p.dutch$Disagree), mean(p.dutch$Agree), mean(p.dutch$FullyAgree)),
dutchUL = c(quantile(p.dutch$FullyDisagree, 0.975), quantile(p.dutch$Disagree, 0.975),
quantile(p.dutch$Agree, 0.975), quantile(p.dutch$FullyAgree, 0.975)),
dutchLL = c(quantile(p.dutch$FullyDisagree, 0.025), quantile(p.dutch$Disagree, 0.025),
quantile(p.dutch$Agree, 0.025), quantile(p.dutch$FullyAgree, 0.025)),
Mean.NonWestern = c(mean(p.nonwest$FullyDisagree), mean(p.nonwest$Disagree), mean(p.nonwest$Agree), mean(p.nonwest$FullyAgree)),
nonWesternUL = c(quantile(p.nonwest$FullyDisagree, 0.975), quantile(p.nonwest$Disagree, 0.975),
quantile(p.nonwest$Agree, 0.975), quantile(p.nonwest$FullyAgree, 0.975)),
nonWesternLL = c(quantile(p.nonwest$FullyDisagree, 0.025), quantile(p.nonwest$Disagree, 0.025),
quantile(p.nonwest$Agree, 0.025), quantile(p.nonwest$FullyAgree, 0.025)),
Difference = c(mean(p.dutch$FullyDisagree - p.nonwest$FullyDisagree),
mean(p.dutch$Disagree - p.nonwest$Disagree),
mean(p.dutch$Agree - p.nonwest$Agree),
mean(p.dutch$FullyAgree - p.nonwest$FullyAgree)),
diffUL = c(quantile((p.dutch$FullyDisagree - p.nonwest$FullyDisagree), 0.975),
quantile((p.dutch$Disagree - p.nonwest$Disagree), 0.975),
quantile((p.dutch$Agree - p.nonwest$Agree), 0.975),
quantile((p.dutch$FullyAgree - p.nonwest$FullyAgree), 0.975)),
diffLL = c(quantile((p.dutch$FullyDisagree - p.nonwest$FullyDisagree), 0.025),
quantile((p.dutch$Disagree - p.nonwest$Disagree), 0.025),
quantile((p.dutch$Agree - p.nonwest$Agree), 0.025),
quantile((p.dutch$FullyAgree - p.nonwest$FullyAgree), 0.025))
)
diff.dutch.nonwest[,2:10] = round(diff.dutch.nonwest[,2:10],3)
# --------
diff.dutch.nonwest = data.frame(
Response = c("Fully Disagree", "Disagree", "Agree", "Fully Agree"),
Mean.Dutch = c(mean(p.west$FullyDisagree), mean(p.west$Disagree), mean(p.west$Agree), mean(p.west$FullyAgree)),
DutchUL = c(quantile(p.west$FullyDisagree, 0.975), quantile(p.west$Disagree, 0.975),
quantile(p.west$Agree, 0.975), quantile(p.west$FullyAgree, 0.975)),
DutchLL = c(quantile(p.west$FullyDisagree, 0.025), quantile(p.west$Disagree, 0.025),
quantile(p.west$Agree, 0.025), quantile(p.west$FullyAgree, 0.025)),
Mean.NonWestern = c(mean(p.nonwest$FullyDisagree), mean(p.nonwest$Disagree), mean(p.nonwest$Agree), mean(p.nonwest$FullyAgree)),
nonWesternUL = c(quantile(p.nonwest$FullyDisagree, 0.975), quantile(p.nonwest$Disagree, 0.975),
quantile(p.nonwest$Agree, 0.975), quantile(p.nonwest$FullyAgree, 0.975)),
nonWesternLL = c(quantile(p.nonwest$FullyDisagree, 0.025), quantile(p.nonwest$Disagree, 0.025),
quantile(p.nonwest$Agree, 0.025), quantile(p.nonwest$FullyAgree, 0.025)),
Difference = c(mean(p.west$FullyDisagree - p.nonwest$FullyDisagree),
mean(p.west$Disagree - p.nonwest$Disagree),
mean(p.west$Agree - p.nonwest$Agree),
mean(p.west$FullyAgree - p.nonwest$FullyAgree)),
diffUL = c(quantile((p.west$FullyDisagree - p.nonwest$FullyDisagree), 0.975),
quantile((p.west$Disagree - p.nonwest$Disagree), 0.975),
quantile((p.west$Agree - p.nonwest$Agree), 0.975),
quantile((p.west$FullyAgree - p.nonwest$FullyAgree), 0.975)),
diffLL = c(quantile((p.west$FullyDisagree - p.nonwest$FullyDisagree), 0.025),
quantile((p.west$Disagree - p.nonwest$Disagree), 0.025),
quantile((p.west$Agree - p.nonwest$Agree), 0.025),
quantile((p.west$FullyAgree - p.nonwest$FullyAgree), 0.025))
)
diff.west.nonwest[,2:10] = round(diff.west.nonwest[,2:10],3)
## Cumulative odds for an 1-unit increase in education
exp(summary(m2)$coefficients[5, 1]) # = 0.6110813
## Relaxing proportional odds assumption for urbanization
data2$muslim = ordered(data2$muslim)
m2b = vglm(muslim ~ urban + leftright + sex + birthyr + educ + immiback,
cumulative(parallel = FALSE ~ urban, reverse = TRUE), data = data2)
# Cumulative odds for urbanization with new model
exp(coefficients(m2b)[4]) # 0.825 for Pr(y > 1) / Pr(y =< 1)
exp(coefficients(m2b)[5]) # 0.956 for Pr(y > 2) / Pr(y =< 2)
exp(coefficients(m2b)[6]) # 1.066 for Pr(y > 3) / Pr(y =< 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment