-
-
Save thiagomarzagao/c916e2a3ce77ea23d9a8 to your computer and use it in GitHub Desktop.
Code used to do the simulations explained on my paper.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### here I re-run Bollen & Paxton's regressions, but with simulated factor scores | |
library(MASS) | |
library(sem) | |
setwd(basedir) | |
bolpaxdata <- read.csv("~/Desktop/SCpaper/bollenpaxtondata.csv") # change path as needed | |
set.seed(10101957) # set seed to make exercise replicable | |
##### part 0: preliminary stuff (transform variables, specify CFA model, create subroutines) | |
### transform variables | |
bolpaxdata$coverage <- bolpaxdata$nyt + bolpaxdata$vanderbilt + bolpaxdata$facts | |
bolpaxdata$age <- 1980 - bolpaxdata$independence | |
bolpaxdata$coups <- bolpaxdata$coups76 + bolpaxdata$coups77 + bolpaxdata$coups78 + bolpaxdata$coups79 + bolpaxdata$coups80 | |
bolpaxdata$radiostvs <- bolpaxdata$radios + bolpaxdata$tvs | |
bolpaxdata$coverage[bolpaxdata$coverage==0] <- .001 | |
bolpaxdata$age[bolpaxdata$age<1] <- NA | |
bolpaxdata$lcoverage <- log(bolpaxdata$coverage) | |
bolpaxdata$lage <- log(bolpaxdata$age) | |
bolpaxdata$lradiostvs <- log(bolpaxdata$radiostvs) | |
bolpaxdata$lpopulation <- log(bolpaxdata$population) | |
bolpaxdata$larea <- log(bolpaxdata$area) | |
bolpaxdata$lenergy <- log(bolpaxdata$energy) | |
bolpaxdata$lprotests <- log(bolpaxdata$protests) | |
bolpaxdata$lstrikes <- log(bolpaxdata$strikes) | |
bolpaxdata$lriots <- log(bolpaxdata$riots) | |
### specify CFA model | |
model.cfadata <- specifyModel(quiet=TRUE) | |
edemoc -> indicator1, lambda_edemoc_indicator1, 69.87 | |
edemoc -> indicator2, lambda_edemoc_indicator2, 68.57 | |
edemoc -> indicator3, lambda_edemoc_indicator3, 53.45 | |
edemoc -> indicator4, lambda_edemoc_indicator4, 28.21 | |
edemoc -> indicator5, lambda_edemoc_indicator5, 40.63 | |
edemoc -> indicator6, lambda_edemoc_indicator6, 97.69 | |
edemoc -> indicator7, lambda_edemoc_indicator7, 21.17 | |
edemoc -> indicator8, lambda_edemoc_indicator8, 51.63 | |
edemoc -> indicator9, lambda_edemoc_indicator9, 26.09 | |
edemoc -> indicator10, lambda_edemoc_indicator10, 55.01 | |
edemoc -> indicator11, lambda_edemoc_indicator11, 63.13 | |
edemoc -> indicator12, lambda_edemoc_indicator12, 15.67 | |
erater1 -> indicator1, lambda_erater1_indicator1, 14.12 | |
erater1 -> indicator2, lambda_erater1_indicator2, 6.71 | |
erater1 -> indicator3, lambda_erater1_indicator3, 18.31 | |
erater1 -> indicator4, lambda_erater1_indicator4, 31.81 | |
erater1 -> indicator5, lambda_erater1_indicator5, 95.69 | |
erater1 -> indicator6, lambda_erater1_indicator6, 38.13 | |
erater2 -> indicator7, lambda_erater2_indicator7, 70.70 | |
erater2 -> indicator8, lambda_erater2_indicator8, 31.51 | |
erater2 -> indicator9, lambda_erater2_indicator9, 90.83 | |
erater2 -> indicator10, lambda_erater2_indicator10, 12.99 | |
erater2 -> indicator11, lambda_erater2_indicator11, 53.06 | |
erater2 -> indicator12, lambda_erater2_indicator12, 52.19 | |
edemoc <-> edemoc, NA, 33.33 | |
erater1 <-> erater1, NA, 25 | |
erater2 <-> erater2, NA, 225 | |
edemoc <-> erater1, NA, 0 | |
edemoc <-> erater2, NA, 0 | |
erater1 <-> erater2, NA, 0 | |
indicator1 <-> indicator1, NA, 673531.2803 | |
indicator2 <-> indicator2, NA, 17596739.13 | |
indicator3 <-> indicator3, NA, 46940.98728 | |
indicator4 <-> indicator4, NA, 10422468.383 | |
indicator5 <-> indicator5, NA, 3192325.784 | |
indicator6 <-> indicator6, NA, 700590.022 | |
indicator7 <-> indicator7, NA, 1976572.886 | |
indicator8 <-> indicator8, NA, 9677149.536 | |
indicator9 <-> indicator9, NA, 1305505.098 | |
indicator10 <-> indicator10, NA, 13204.38979 | |
indicator11 <-> indicator11, NA, 581054.330 | |
indicator12 <-> indicator12, NA, 14681009.32 | |
### create subroutines | |
sub1 <- function(n) { # create empty objects to store estimates | |
coefs1 <<- vector(length=n) | |
sds1 <<- vector(length=n) | |
ts1 <<- vector(length=n) | |
ps1 <<- vector(length=n) | |
coefs2 <<- vector(length=n) | |
sds2 <<- vector(length=n) | |
ts2 <<- vector(length=n) | |
ps2 <<- vector(length=n) | |
} | |
sub2 <- function() { # generate simulated factors | |
democ <<- runif(n=186, min=0, max=20) | |
rater1 <<- rnorm(n=186, mean=5, sd=5) | |
rater2 <<- rnorm(n=186, mean=5, sd=15) | |
factors <<- cbind(democ, rater1, rater2) | |
fulldata <<- cbind(bolpaxdata, factors) | |
} | |
sub3 <- function() { # generate simulated errors | |
error1 <<- rnorm(n=186, mean=0, sd=827) + rbeta(n=186, shape1=0.68, shape2=0.78) | |
error2 <<- rnorm(n=186, mean=0, sd=4188) + rbeta(n=186, shape1=0.10, shape2=0.72) | |
error3 <<- rnorm(n=186, mean=0, sd=228) + rbeta(n=186, shape1=0.58, shape2=0.67) | |
error4 <<- rnorm(n=186, mean=0, sd=3237) + rbeta(n=186, shape1=0.50, shape2=0.60) | |
error5 <<- rnorm(n=186, mean=0, sd=1965) + rbeta(n=186, shape1=0.06, shape2=0.83) | |
error6 <<- rnorm(n=186, mean=0, sd=734) + rbeta(n=186, shape1=0.15, shape2=0.86) | |
error7 <<- rnorm(n=186, mean=0, sd=1439) + rbeta(n=186, shape1=0.51, shape2=0.46) | |
error8 <<- rnorm(n=186, mean=0, sd=2983) + rbeta(n=186, shape1=0.23, shape2=0.40) | |
error9 <<- rnorm(n=186, mean=0, sd=1190) + rbeta(n=186, shape1=0.73, shape2=0.21) | |
error10 <<- rnorm(n=186, mean=0, sd=112) + rbeta(n=186, shape1=0.26, shape2=0.63) | |
error11 <<- rnorm(n=186, mean=0, sd=806) + rbeta(n=186, shape1=0.54, shape2=0.37) | |
error12 <<- rnorm(n=186, mean=0, sd=4299) + rbeta(n=186, shape1=0.13, shape2=0.46) | |
errors <<- cbind(error1, error2, error3, error4, error5, error6, error7, error8, error9, error10, error11, error12) | |
fulldata <<- cbind(fulldata, errors) | |
} | |
sub4 <- function(prec=1, ...) { # generate simulated indicators | |
indicator1 <<- (14.12 * fulldata$rater1) + (69.87 * democ) + error1/prec | |
indicator2 <<- (6.71 * fulldata$rater1) + (68.57 * democ) + error2/prec | |
indicator3 <<- (18.31 * fulldata$rater1) + (53.45 * democ) + error3/prec | |
indicator4 <<- (31.81 * fulldata$rater1) + (28.21 * democ) + error4/prec | |
indicator5 <<- (95.69 * fulldata$rater1) + (40.63 * democ) + error5/prec | |
indicator6 <<- (38.13 * fulldata$rater1) + (97.69 * democ) + error6/prec | |
indicator7 <<- (70.70 * fulldata$rater2) + (21.17 * democ) + error7/prec | |
indicator8 <<- (31.51 * fulldata$rater2) + (51.63 * democ) + error8/prec | |
indicator9 <<- (90.83 * fulldata$rater2) + (26.09 * democ) + error9/prec | |
indicator10 <<- (12.99 * fulldata$rater2) + (55.01 * democ) + error10/prec | |
indicator11 <<- (53.06 * fulldata$rater2) + (63.13 * democ) + error11/prec | |
indicator12 <<- (52.19 * fulldata$rater2) + (15.67 * democ) + error12/prec | |
indicators <<- cbind(indicator1, indicator2, indicator3, indicator4, indicator5, indicator6, indicator7, indicator8, indicator9, indicator10, indicator11, indicator12) | |
fulldata <<- cbind(fulldata, indicators) | |
} | |
sub5 <- function() { # run CFA model | |
cfadata <<- fulldata[c(55:66)] | |
cfadata.cov <<- cov(cfadata) | |
cfadata.sem <<- sem(model.cfadata, cfadata.cov, nrow(cfadata)) | |
fulldata <<- cbind(fulldata, fscores(cfadata.sem,data=cfadata)) | |
} | |
sub6 <- function() { # regress factor scores on country-level variables | |
factor1reg <<- lm(fulldata$erater1 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs) | |
factor2reg <<- lm(fulldata$erater2 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs) | |
} | |
sub7 <- function() { # collect estimates | |
coefs1 <<- coefs1 | |
sds1 <<- sds1 | |
ts1 <<- ts1 | |
ps1 <<- ps1 | |
coefs2 <<- coefs2 | |
sds2 <<- sds2 | |
ts2 <<- ts2 | |
ps2 <<- ps2 | |
estimates <<- cbind(coefs1, sds1, ts1, ps1, coefs2, sds2, ts2, ps2) | |
} | |
### set bias and precision levels | |
## set bias levels | |
bias1 = c(20, 15, 10, 7, 5, 3, 1, 0.5, 0) | |
bias2 = append(rep(c(0.025), 8), 0) | |
bias3 = c(1, 1.3, 1.5, 1.7, 2.3, 2.5, 2.7, 2.9, 3.3) | |
bias4 = c(1, 1.35, 1.55, 1.75, 2.35, 2.55, 2.75, 2.95, 3.35) | |
## set precision levels | |
precs = c(1, 0.001, 1000) | |
##### part 1: marxism | |
### define master function | |
marxfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) { | |
sub1(n) # create empty objects to store estimates | |
for(t in 1:n) { | |
sub2() # generate simulated factors | |
# introduce systematic bias (additive) | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$marxist[i] == 1) { | |
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1 | |
} | |
} | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$marxist[i] == 1) { | |
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2 | |
} | |
} | |
sub3() # generate simulated errors | |
sub4() # generate simulated indicators | |
sub5() # run CFA model | |
sub6() # regress factor scores on country-level variables | |
# extract estimates | |
coefs1[t] <<- summary(factor1reg)$coef[2,1] | |
sds1[t] <<- summary(factor1reg)$coef[2,2] | |
ts1[t] <<- summary(factor1reg)$coef[2,3] | |
ps1[t] <<- summary(factor1reg)$coef[2,4] | |
coefs2[t] <<- summary(factor2reg)$coef[2,1] | |
sds2[t] <<- summary(factor2reg)$coef[2,2] | |
ts2[t] <<- summary(factor2reg)$coef[2,3] | |
ps2[t] <<- summary(factor2reg)$coef[2,4] | |
} | |
sub7() # collect estimates | |
} | |
### run master function | |
## create container to store results | |
fails.marx <- matrix(nrow=9, ncol=3) | |
rownames(fails.marx) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0") | |
colnames(fails.marx) <- c("prec=1", "prec=0.001", "prec=1000") | |
## compute estimates | |
for(t in 1:9) { | |
print(paste("Batch", t)) | |
for(u in 1:3) { | |
marxfunc(1000, bias1[t], bias2[t], precs[u]) | |
estimates <- data.frame(estimates) | |
fails.marx[t,u] <- length(estimates$coefs1[(estimates$coefs1>0 & estimates$coefs2<0) & (estimates$ps1<0.1 & estimates$ps2<0.1)]) | |
} | |
} | |
##### part 2: protestantism | |
### define master function | |
protfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) { | |
sub1(n) # create empty objects to store estimates | |
for(t in 1:n) { | |
sub2() # generate simulated factors | |
# introduce systematic bias (multiplicative) | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$protestant[i] == 1) { | |
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1 | |
} | |
} | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$protestant[i] == 1) { | |
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2 | |
} | |
} | |
sub3() # generate simulated errors | |
sub4() # generate simulated indicators | |
sub5() # run CFA model | |
sub6() # regress factor scores on country-level variables | |
# extract estimates | |
coefs1[t] <<- summary(factor1reg)$coef[4,1] | |
sds1[t] <<- summary(factor1reg)$coef[4,2] | |
ts1[t] <<- summary(factor1reg)$coef[4,3] | |
ps1[t] <<- summary(factor1reg)$coef[4,4] | |
coefs2[t] <<- summary(factor2reg)$coef[4,1] | |
sds2[t] <<- summary(factor2reg)$coef[4,2] | |
ts2[t] <<- summary(factor2reg)$coef[4,3] | |
ps2[t] <<- summary(factor2reg)$coef[4,4] | |
} | |
sub7() # collect estimates | |
} | |
### run master function | |
## create container to store results | |
fails.prot <- matrix(nrow=9, ncol=3) | |
rownames(fails.prot) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35") | |
colnames(fails.prot) <- c("prec=1", "prec=0.001", "prec=1000") | |
## compute estimates | |
for(t in 1:9) { | |
print(paste("Batch", t)) | |
for(u in 1:3) { | |
protfunc(1000, bias3[t], bias4[t], precs[u]) | |
estimates <- data.frame(estimates) | |
fails.prot[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1]) | |
} | |
} | |
##### part 3: catholicism | |
### define master function | |
cathfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) { | |
sub1(n) # create empty objects to store estimates | |
for(t in 1:n) { | |
sub2() # generate simulated factors | |
# introduce systematic bias (additive) | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$catholic[i] == 1) { | |
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1 | |
} | |
} | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$catholic[i] == 1) { | |
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2 | |
} | |
} | |
sub3() # generate simulated errors | |
sub4() # generate simulated indicators | |
sub5() # run CFA model | |
sub6() # regress factor scores on country-level variables | |
# extract estimates | |
coefs1[t] <<- summary(factor1reg)$coef[5,1] | |
sds1[t] <<- summary(factor1reg)$coef[5,2] | |
ts1[t] <<- summary(factor1reg)$coef[5,3] | |
ps1[t] <<- summary(factor1reg)$coef[5,4] | |
coefs2[t] <<- summary(factor2reg)$coef[5,1] | |
sds2[t] <<- summary(factor2reg)$coef[5,2] | |
ts2[t] <<- summary(factor2reg)$coef[5,3] | |
ps2[t] <<- summary(factor2reg)$coef[5,4] | |
} | |
sub7() # collect estimates | |
} | |
### run master function | |
## create container to store results | |
fails.cath <- matrix(nrow=9, ncol=3) | |
rownames(fails.cath) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0") | |
colnames(fails.cath) <- c("prec=1", "prec=0.001", "prec=1000") | |
## compute estimates | |
for(t in 1:9) { | |
print(paste("Batch", t)) | |
for(u in 1:3) { | |
cathfunc(1000, bias1[t], bias2[t], precs[u]) | |
estimates <- data.frame(estimates) | |
fails.cath[t,u] <- length(estimates$coefs1[estimates$coefs1>0 & estimates$ps1<0.1 & estimates$ps2>0.1]) | |
} | |
} | |
##### part 4: monarchy | |
### define master function | |
monfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) { | |
sub1(n) # create empty objects to store estimates | |
for(t in 1:n) { | |
sub2() # generate simulated factors | |
# introduce systematic bias (multiplicative) | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$monarchy[i] == 1) { | |
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1 | |
} | |
} | |
for(i in 1:nrow(fulldata)) { | |
if(fulldata$monarchy[i] == 1) { | |
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2 | |
} | |
} | |
sub3() # generate simulated errors | |
sub4() # generate simulated indicators | |
sub5() # run CFA model | |
sub6() # regress factor scores on country-level variables | |
# extract estimates | |
coefs1[t] <<- summary(factor1reg)$coef[7,1] | |
sds1[t] <<- summary(factor1reg)$coef[7,2] | |
ts1[t] <<- summary(factor1reg)$coef[7,3] | |
ps1[t] <<- summary(factor1reg)$coef[7,4] | |
coefs2[t] <<- summary(factor2reg)$coef[7,1] | |
sds2[t] <<- summary(factor2reg)$coef[7,2] | |
ts2[t] <<- summary(factor2reg)$coef[7,3] | |
ps2[t] <<- summary(factor2reg)$coef[7,4] | |
} | |
sub7() # collect estimates | |
} | |
### run master function | |
## create container to store results | |
fails.mon <- matrix(nrow=9, ncol=3) | |
rownames(fails.mon) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35") | |
colnames(fails.mon) <- c("prec=1", "prec=0.001", "prec=1000") | |
## compute estimates | |
for(t in 1:9) { | |
print(paste("Batch", t)) | |
for(u in 1:3) { | |
monfunc(1000, bias3[t], bias4[t], precs[u]) | |
estimates <- data.frame(estimates) | |
fails.mon[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1]) | |
} | |
} | |
##### part 5: show all results | |
fails.marx | |
fails.prot | |
fails.cath | |
fails.mon |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment