Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Code used to do the simulations explained on my paper.
##### here I re-run Bollen & Paxton's regressions, but with simulated factor scores
library(MASS)
library(sem)
setwd(basedir)
bolpaxdata <- read.csv("~/Desktop/SCpaper/bollenpaxtondata.csv") # change path as needed
set.seed(10101957) # set seed to make exercise replicable
##### part 0: preliminary stuff (transform variables, specify CFA model, create subroutines)
### transform variables
bolpaxdata$coverage <- bolpaxdata$nyt + bolpaxdata$vanderbilt + bolpaxdata$facts
bolpaxdata$age <- 1980 - bolpaxdata$independence
bolpaxdata$coups <- bolpaxdata$coups76 + bolpaxdata$coups77 + bolpaxdata$coups78 + bolpaxdata$coups79 + bolpaxdata$coups80
bolpaxdata$radiostvs <- bolpaxdata$radios + bolpaxdata$tvs
bolpaxdata$coverage[bolpaxdata$coverage==0] <- .001
bolpaxdata$age[bolpaxdata$age<1] <- NA
bolpaxdata$lcoverage <- log(bolpaxdata$coverage)
bolpaxdata$lage <- log(bolpaxdata$age)
bolpaxdata$lradiostvs <- log(bolpaxdata$radiostvs)
bolpaxdata$lpopulation <- log(bolpaxdata$population)
bolpaxdata$larea <- log(bolpaxdata$area)
bolpaxdata$lenergy <- log(bolpaxdata$energy)
bolpaxdata$lprotests <- log(bolpaxdata$protests)
bolpaxdata$lstrikes <- log(bolpaxdata$strikes)
bolpaxdata$lriots <- log(bolpaxdata$riots)
### specify CFA model
model.cfadata <- specifyModel(quiet=TRUE)
edemoc -> indicator1, lambda_edemoc_indicator1, 69.87
edemoc -> indicator2, lambda_edemoc_indicator2, 68.57
edemoc -> indicator3, lambda_edemoc_indicator3, 53.45
edemoc -> indicator4, lambda_edemoc_indicator4, 28.21
edemoc -> indicator5, lambda_edemoc_indicator5, 40.63
edemoc -> indicator6, lambda_edemoc_indicator6, 97.69
edemoc -> indicator7, lambda_edemoc_indicator7, 21.17
edemoc -> indicator8, lambda_edemoc_indicator8, 51.63
edemoc -> indicator9, lambda_edemoc_indicator9, 26.09
edemoc -> indicator10, lambda_edemoc_indicator10, 55.01
edemoc -> indicator11, lambda_edemoc_indicator11, 63.13
edemoc -> indicator12, lambda_edemoc_indicator12, 15.67
erater1 -> indicator1, lambda_erater1_indicator1, 14.12
erater1 -> indicator2, lambda_erater1_indicator2, 6.71
erater1 -> indicator3, lambda_erater1_indicator3, 18.31
erater1 -> indicator4, lambda_erater1_indicator4, 31.81
erater1 -> indicator5, lambda_erater1_indicator5, 95.69
erater1 -> indicator6, lambda_erater1_indicator6, 38.13
erater2 -> indicator7, lambda_erater2_indicator7, 70.70
erater2 -> indicator8, lambda_erater2_indicator8, 31.51
erater2 -> indicator9, lambda_erater2_indicator9, 90.83
erater2 -> indicator10, lambda_erater2_indicator10, 12.99
erater2 -> indicator11, lambda_erater2_indicator11, 53.06
erater2 -> indicator12, lambda_erater2_indicator12, 52.19
edemoc <-> edemoc, NA, 33.33
erater1 <-> erater1, NA, 25
erater2 <-> erater2, NA, 225
edemoc <-> erater1, NA, 0
edemoc <-> erater2, NA, 0
erater1 <-> erater2, NA, 0
indicator1 <-> indicator1, NA, 673531.2803
indicator2 <-> indicator2, NA, 17596739.13
indicator3 <-> indicator3, NA, 46940.98728
indicator4 <-> indicator4, NA, 10422468.383
indicator5 <-> indicator5, NA, 3192325.784
indicator6 <-> indicator6, NA, 700590.022
indicator7 <-> indicator7, NA, 1976572.886
indicator8 <-> indicator8, NA, 9677149.536
indicator9 <-> indicator9, NA, 1305505.098
indicator10 <-> indicator10, NA, 13204.38979
indicator11 <-> indicator11, NA, 581054.330
indicator12 <-> indicator12, NA, 14681009.32
### create subroutines
sub1 <- function(n) { # create empty objects to store estimates
coefs1 <<- vector(length=n)
sds1 <<- vector(length=n)
ts1 <<- vector(length=n)
ps1 <<- vector(length=n)
coefs2 <<- vector(length=n)
sds2 <<- vector(length=n)
ts2 <<- vector(length=n)
ps2 <<- vector(length=n)
}
sub2 <- function() { # generate simulated factors
democ <<- runif(n=186, min=0, max=20)
rater1 <<- rnorm(n=186, mean=5, sd=5)
rater2 <<- rnorm(n=186, mean=5, sd=15)
factors <<- cbind(democ, rater1, rater2)
fulldata <<- cbind(bolpaxdata, factors)
}
sub3 <- function() { # generate simulated errors
error1 <<- rnorm(n=186, mean=0, sd=827) + rbeta(n=186, shape1=0.68, shape2=0.78)
error2 <<- rnorm(n=186, mean=0, sd=4188) + rbeta(n=186, shape1=0.10, shape2=0.72)
error3 <<- rnorm(n=186, mean=0, sd=228) + rbeta(n=186, shape1=0.58, shape2=0.67)
error4 <<- rnorm(n=186, mean=0, sd=3237) + rbeta(n=186, shape1=0.50, shape2=0.60)
error5 <<- rnorm(n=186, mean=0, sd=1965) + rbeta(n=186, shape1=0.06, shape2=0.83)
error6 <<- rnorm(n=186, mean=0, sd=734) + rbeta(n=186, shape1=0.15, shape2=0.86)
error7 <<- rnorm(n=186, mean=0, sd=1439) + rbeta(n=186, shape1=0.51, shape2=0.46)
error8 <<- rnorm(n=186, mean=0, sd=2983) + rbeta(n=186, shape1=0.23, shape2=0.40)
error9 <<- rnorm(n=186, mean=0, sd=1190) + rbeta(n=186, shape1=0.73, shape2=0.21)
error10 <<- rnorm(n=186, mean=0, sd=112) + rbeta(n=186, shape1=0.26, shape2=0.63)
error11 <<- rnorm(n=186, mean=0, sd=806) + rbeta(n=186, shape1=0.54, shape2=0.37)
error12 <<- rnorm(n=186, mean=0, sd=4299) + rbeta(n=186, shape1=0.13, shape2=0.46)
errors <<- cbind(error1, error2, error3, error4, error5, error6, error7, error8, error9, error10, error11, error12)
fulldata <<- cbind(fulldata, errors)
}
sub4 <- function(prec=1, ...) { # generate simulated indicators
indicator1 <<- (14.12 * fulldata$rater1) + (69.87 * democ) + error1/prec
indicator2 <<- (6.71 * fulldata$rater1) + (68.57 * democ) + error2/prec
indicator3 <<- (18.31 * fulldata$rater1) + (53.45 * democ) + error3/prec
indicator4 <<- (31.81 * fulldata$rater1) + (28.21 * democ) + error4/prec
indicator5 <<- (95.69 * fulldata$rater1) + (40.63 * democ) + error5/prec
indicator6 <<- (38.13 * fulldata$rater1) + (97.69 * democ) + error6/prec
indicator7 <<- (70.70 * fulldata$rater2) + (21.17 * democ) + error7/prec
indicator8 <<- (31.51 * fulldata$rater2) + (51.63 * democ) + error8/prec
indicator9 <<- (90.83 * fulldata$rater2) + (26.09 * democ) + error9/prec
indicator10 <<- (12.99 * fulldata$rater2) + (55.01 * democ) + error10/prec
indicator11 <<- (53.06 * fulldata$rater2) + (63.13 * democ) + error11/prec
indicator12 <<- (52.19 * fulldata$rater2) + (15.67 * democ) + error12/prec
indicators <<- cbind(indicator1, indicator2, indicator3, indicator4, indicator5, indicator6, indicator7, indicator8, indicator9, indicator10, indicator11, indicator12)
fulldata <<- cbind(fulldata, indicators)
}
sub5 <- function() { # run CFA model
cfadata <<- fulldata[c(55:66)]
cfadata.cov <<- cov(cfadata)
cfadata.sem <<- sem(model.cfadata, cfadata.cov, nrow(cfadata))
fulldata <<- cbind(fulldata, fscores(cfadata.sem,data=cfadata))
}
sub6 <- function() { # regress factor scores on country-level variables
factor1reg <<- lm(fulldata$erater1 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs)
factor2reg <<- lm(fulldata$erater2 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs)
}
sub7 <- function() { # collect estimates
coefs1 <<- coefs1
sds1 <<- sds1
ts1 <<- ts1
ps1 <<- ps1
coefs2 <<- coefs2
sds2 <<- sds2
ts2 <<- ts2
ps2 <<- ps2
estimates <<- cbind(coefs1, sds1, ts1, ps1, coefs2, sds2, ts2, ps2)
}
### set bias and precision levels
## set bias levels
bias1 = c(20, 15, 10, 7, 5, 3, 1, 0.5, 0)
bias2 = append(rep(c(0.025), 8), 0)
bias3 = c(1, 1.3, 1.5, 1.7, 2.3, 2.5, 2.7, 2.9, 3.3)
bias4 = c(1, 1.35, 1.55, 1.75, 2.35, 2.55, 2.75, 2.95, 3.35)
## set precision levels
precs = c(1, 0.001, 1000)
##### part 1: marxism
### define master function
marxfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (additive)
for(i in 1:nrow(fulldata)) {
if(fulldata$marxist[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$marxist[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[2,1]
sds1[t] <<- summary(factor1reg)$coef[2,2]
ts1[t] <<- summary(factor1reg)$coef[2,3]
ps1[t] <<- summary(factor1reg)$coef[2,4]
coefs2[t] <<- summary(factor2reg)$coef[2,1]
sds2[t] <<- summary(factor2reg)$coef[2,2]
ts2[t] <<- summary(factor2reg)$coef[2,3]
ps2[t] <<- summary(factor2reg)$coef[2,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.marx <- matrix(nrow=9, ncol=3)
rownames(fails.marx) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0")
colnames(fails.marx) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
marxfunc(1000, bias1[t], bias2[t], precs[u])
estimates <- data.frame(estimates)
fails.marx[t,u] <- length(estimates$coefs1[(estimates$coefs1>0 & estimates$coefs2<0) & (estimates$ps1<0.1 & estimates$ps2<0.1)])
}
}
##### part 2: protestantism
### define master function
protfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (multiplicative)
for(i in 1:nrow(fulldata)) {
if(fulldata$protestant[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$protestant[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[4,1]
sds1[t] <<- summary(factor1reg)$coef[4,2]
ts1[t] <<- summary(factor1reg)$coef[4,3]
ps1[t] <<- summary(factor1reg)$coef[4,4]
coefs2[t] <<- summary(factor2reg)$coef[4,1]
sds2[t] <<- summary(factor2reg)$coef[4,2]
ts2[t] <<- summary(factor2reg)$coef[4,3]
ps2[t] <<- summary(factor2reg)$coef[4,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.prot <- matrix(nrow=9, ncol=3)
rownames(fails.prot) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35")
colnames(fails.prot) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
protfunc(1000, bias3[t], bias4[t], precs[u])
estimates <- data.frame(estimates)
fails.prot[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1])
}
}
##### part 3: catholicism
### define master function
cathfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (additive)
for(i in 1:nrow(fulldata)) {
if(fulldata$catholic[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$catholic[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[5,1]
sds1[t] <<- summary(factor1reg)$coef[5,2]
ts1[t] <<- summary(factor1reg)$coef[5,3]
ps1[t] <<- summary(factor1reg)$coef[5,4]
coefs2[t] <<- summary(factor2reg)$coef[5,1]
sds2[t] <<- summary(factor2reg)$coef[5,2]
ts2[t] <<- summary(factor2reg)$coef[5,3]
ps2[t] <<- summary(factor2reg)$coef[5,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.cath <- matrix(nrow=9, ncol=3)
rownames(fails.cath) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0")
colnames(fails.cath) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
cathfunc(1000, bias1[t], bias2[t], precs[u])
estimates <- data.frame(estimates)
fails.cath[t,u] <- length(estimates$coefs1[estimates$coefs1>0 & estimates$ps1<0.1 & estimates$ps2>0.1])
}
}
##### part 4: monarchy
### define master function
monfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (multiplicative)
for(i in 1:nrow(fulldata)) {
if(fulldata$monarchy[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$monarchy[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[7,1]
sds1[t] <<- summary(factor1reg)$coef[7,2]
ts1[t] <<- summary(factor1reg)$coef[7,3]
ps1[t] <<- summary(factor1reg)$coef[7,4]
coefs2[t] <<- summary(factor2reg)$coef[7,1]
sds2[t] <<- summary(factor2reg)$coef[7,2]
ts2[t] <<- summary(factor2reg)$coef[7,3]
ps2[t] <<- summary(factor2reg)$coef[7,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.mon <- matrix(nrow=9, ncol=3)
rownames(fails.mon) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35")
colnames(fails.mon) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
monfunc(1000, bias3[t], bias4[t], precs[u])
estimates <- data.frame(estimates)
fails.mon[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1])
}
}
##### part 5: show all results
fails.marx
fails.prot
fails.cath
fails.mon
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.