Skip to content

Instantly share code, notes, and snippets.

@thiagomarzagao
Last active January 3, 2016 06:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thiagomarzagao/c916e2a3ce77ea23d9a8 to your computer and use it in GitHub Desktop.
Save thiagomarzagao/c916e2a3ce77ea23d9a8 to your computer and use it in GitHub Desktop.
Code used to do the simulations explained on my paper.
##### here I re-run Bollen & Paxton's regressions, but with simulated factor scores
library(MASS)
library(sem)
setwd(basedir)
bolpaxdata <- read.csv("~/Desktop/SCpaper/bollenpaxtondata.csv") # change path as needed
set.seed(10101957) # set seed to make exercise replicable
##### part 0: preliminary stuff (transform variables, specify CFA model, create subroutines)
### transform variables
bolpaxdata$coverage <- bolpaxdata$nyt + bolpaxdata$vanderbilt + bolpaxdata$facts
bolpaxdata$age <- 1980 - bolpaxdata$independence
bolpaxdata$coups <- bolpaxdata$coups76 + bolpaxdata$coups77 + bolpaxdata$coups78 + bolpaxdata$coups79 + bolpaxdata$coups80
bolpaxdata$radiostvs <- bolpaxdata$radios + bolpaxdata$tvs
bolpaxdata$coverage[bolpaxdata$coverage==0] <- .001
bolpaxdata$age[bolpaxdata$age<1] <- NA
bolpaxdata$lcoverage <- log(bolpaxdata$coverage)
bolpaxdata$lage <- log(bolpaxdata$age)
bolpaxdata$lradiostvs <- log(bolpaxdata$radiostvs)
bolpaxdata$lpopulation <- log(bolpaxdata$population)
bolpaxdata$larea <- log(bolpaxdata$area)
bolpaxdata$lenergy <- log(bolpaxdata$energy)
bolpaxdata$lprotests <- log(bolpaxdata$protests)
bolpaxdata$lstrikes <- log(bolpaxdata$strikes)
bolpaxdata$lriots <- log(bolpaxdata$riots)
### specify CFA model
model.cfadata <- specifyModel(quiet=TRUE)
edemoc -> indicator1, lambda_edemoc_indicator1, 69.87
edemoc -> indicator2, lambda_edemoc_indicator2, 68.57
edemoc -> indicator3, lambda_edemoc_indicator3, 53.45
edemoc -> indicator4, lambda_edemoc_indicator4, 28.21
edemoc -> indicator5, lambda_edemoc_indicator5, 40.63
edemoc -> indicator6, lambda_edemoc_indicator6, 97.69
edemoc -> indicator7, lambda_edemoc_indicator7, 21.17
edemoc -> indicator8, lambda_edemoc_indicator8, 51.63
edemoc -> indicator9, lambda_edemoc_indicator9, 26.09
edemoc -> indicator10, lambda_edemoc_indicator10, 55.01
edemoc -> indicator11, lambda_edemoc_indicator11, 63.13
edemoc -> indicator12, lambda_edemoc_indicator12, 15.67
erater1 -> indicator1, lambda_erater1_indicator1, 14.12
erater1 -> indicator2, lambda_erater1_indicator2, 6.71
erater1 -> indicator3, lambda_erater1_indicator3, 18.31
erater1 -> indicator4, lambda_erater1_indicator4, 31.81
erater1 -> indicator5, lambda_erater1_indicator5, 95.69
erater1 -> indicator6, lambda_erater1_indicator6, 38.13
erater2 -> indicator7, lambda_erater2_indicator7, 70.70
erater2 -> indicator8, lambda_erater2_indicator8, 31.51
erater2 -> indicator9, lambda_erater2_indicator9, 90.83
erater2 -> indicator10, lambda_erater2_indicator10, 12.99
erater2 -> indicator11, lambda_erater2_indicator11, 53.06
erater2 -> indicator12, lambda_erater2_indicator12, 52.19
edemoc <-> edemoc, NA, 33.33
erater1 <-> erater1, NA, 25
erater2 <-> erater2, NA, 225
edemoc <-> erater1, NA, 0
edemoc <-> erater2, NA, 0
erater1 <-> erater2, NA, 0
indicator1 <-> indicator1, NA, 673531.2803
indicator2 <-> indicator2, NA, 17596739.13
indicator3 <-> indicator3, NA, 46940.98728
indicator4 <-> indicator4, NA, 10422468.383
indicator5 <-> indicator5, NA, 3192325.784
indicator6 <-> indicator6, NA, 700590.022
indicator7 <-> indicator7, NA, 1976572.886
indicator8 <-> indicator8, NA, 9677149.536
indicator9 <-> indicator9, NA, 1305505.098
indicator10 <-> indicator10, NA, 13204.38979
indicator11 <-> indicator11, NA, 581054.330
indicator12 <-> indicator12, NA, 14681009.32
### create subroutines
sub1 <- function(n) { # create empty objects to store estimates
coefs1 <<- vector(length=n)
sds1 <<- vector(length=n)
ts1 <<- vector(length=n)
ps1 <<- vector(length=n)
coefs2 <<- vector(length=n)
sds2 <<- vector(length=n)
ts2 <<- vector(length=n)
ps2 <<- vector(length=n)
}
sub2 <- function() { # generate simulated factors
democ <<- runif(n=186, min=0, max=20)
rater1 <<- rnorm(n=186, mean=5, sd=5)
rater2 <<- rnorm(n=186, mean=5, sd=15)
factors <<- cbind(democ, rater1, rater2)
fulldata <<- cbind(bolpaxdata, factors)
}
sub3 <- function() { # generate simulated errors
error1 <<- rnorm(n=186, mean=0, sd=827) + rbeta(n=186, shape1=0.68, shape2=0.78)
error2 <<- rnorm(n=186, mean=0, sd=4188) + rbeta(n=186, shape1=0.10, shape2=0.72)
error3 <<- rnorm(n=186, mean=0, sd=228) + rbeta(n=186, shape1=0.58, shape2=0.67)
error4 <<- rnorm(n=186, mean=0, sd=3237) + rbeta(n=186, shape1=0.50, shape2=0.60)
error5 <<- rnorm(n=186, mean=0, sd=1965) + rbeta(n=186, shape1=0.06, shape2=0.83)
error6 <<- rnorm(n=186, mean=0, sd=734) + rbeta(n=186, shape1=0.15, shape2=0.86)
error7 <<- rnorm(n=186, mean=0, sd=1439) + rbeta(n=186, shape1=0.51, shape2=0.46)
error8 <<- rnorm(n=186, mean=0, sd=2983) + rbeta(n=186, shape1=0.23, shape2=0.40)
error9 <<- rnorm(n=186, mean=0, sd=1190) + rbeta(n=186, shape1=0.73, shape2=0.21)
error10 <<- rnorm(n=186, mean=0, sd=112) + rbeta(n=186, shape1=0.26, shape2=0.63)
error11 <<- rnorm(n=186, mean=0, sd=806) + rbeta(n=186, shape1=0.54, shape2=0.37)
error12 <<- rnorm(n=186, mean=0, sd=4299) + rbeta(n=186, shape1=0.13, shape2=0.46)
errors <<- cbind(error1, error2, error3, error4, error5, error6, error7, error8, error9, error10, error11, error12)
fulldata <<- cbind(fulldata, errors)
}
sub4 <- function(prec=1, ...) { # generate simulated indicators
indicator1 <<- (14.12 * fulldata$rater1) + (69.87 * democ) + error1/prec
indicator2 <<- (6.71 * fulldata$rater1) + (68.57 * democ) + error2/prec
indicator3 <<- (18.31 * fulldata$rater1) + (53.45 * democ) + error3/prec
indicator4 <<- (31.81 * fulldata$rater1) + (28.21 * democ) + error4/prec
indicator5 <<- (95.69 * fulldata$rater1) + (40.63 * democ) + error5/prec
indicator6 <<- (38.13 * fulldata$rater1) + (97.69 * democ) + error6/prec
indicator7 <<- (70.70 * fulldata$rater2) + (21.17 * democ) + error7/prec
indicator8 <<- (31.51 * fulldata$rater2) + (51.63 * democ) + error8/prec
indicator9 <<- (90.83 * fulldata$rater2) + (26.09 * democ) + error9/prec
indicator10 <<- (12.99 * fulldata$rater2) + (55.01 * democ) + error10/prec
indicator11 <<- (53.06 * fulldata$rater2) + (63.13 * democ) + error11/prec
indicator12 <<- (52.19 * fulldata$rater2) + (15.67 * democ) + error12/prec
indicators <<- cbind(indicator1, indicator2, indicator3, indicator4, indicator5, indicator6, indicator7, indicator8, indicator9, indicator10, indicator11, indicator12)
fulldata <<- cbind(fulldata, indicators)
}
sub5 <- function() { # run CFA model
cfadata <<- fulldata[c(55:66)]
cfadata.cov <<- cov(cfadata)
cfadata.sem <<- sem(model.cfadata, cfadata.cov, nrow(cfadata))
fulldata <<- cbind(fulldata, fscores(cfadata.sem,data=cfadata))
}
sub6 <- function() { # regress factor scores on country-level variables
factor1reg <<- lm(fulldata$erater1 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs)
factor2reg <<- lm(fulldata$erater2 ~ fulldata$marxist + fulldata$lenergy + fulldata$protestant + fulldata$catholic + fulldata$lage + fulldata$monarchy + fulldata$coups + fulldata$war + fulldata$lprotests + fulldata$lstrikes + fulldata$lriots + fulldata$lcoverage + fulldata$lpopulation + fulldata$larea + fulldata$lradiostvs)
}
sub7 <- function() { # collect estimates
coefs1 <<- coefs1
sds1 <<- sds1
ts1 <<- ts1
ps1 <<- ps1
coefs2 <<- coefs2
sds2 <<- sds2
ts2 <<- ts2
ps2 <<- ps2
estimates <<- cbind(coefs1, sds1, ts1, ps1, coefs2, sds2, ts2, ps2)
}
### set bias and precision levels
## set bias levels
bias1 = c(20, 15, 10, 7, 5, 3, 1, 0.5, 0)
bias2 = append(rep(c(0.025), 8), 0)
bias3 = c(1, 1.3, 1.5, 1.7, 2.3, 2.5, 2.7, 2.9, 3.3)
bias4 = c(1, 1.35, 1.55, 1.75, 2.35, 2.55, 2.75, 2.95, 3.35)
## set precision levels
precs = c(1, 0.001, 1000)
##### part 1: marxism
### define master function
marxfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (additive)
for(i in 1:nrow(fulldata)) {
if(fulldata$marxist[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$marxist[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[2,1]
sds1[t] <<- summary(factor1reg)$coef[2,2]
ts1[t] <<- summary(factor1reg)$coef[2,3]
ps1[t] <<- summary(factor1reg)$coef[2,4]
coefs2[t] <<- summary(factor2reg)$coef[2,1]
sds2[t] <<- summary(factor2reg)$coef[2,2]
ts2[t] <<- summary(factor2reg)$coef[2,3]
ps2[t] <<- summary(factor2reg)$coef[2,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.marx <- matrix(nrow=9, ncol=3)
rownames(fails.marx) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0")
colnames(fails.marx) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
marxfunc(1000, bias1[t], bias2[t], precs[u])
estimates <- data.frame(estimates)
fails.marx[t,u] <- length(estimates$coefs1[(estimates$coefs1>0 & estimates$coefs2<0) & (estimates$ps1<0.1 & estimates$ps2<0.1)])
}
}
##### part 2: protestantism
### define master function
protfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (multiplicative)
for(i in 1:nrow(fulldata)) {
if(fulldata$protestant[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$protestant[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[4,1]
sds1[t] <<- summary(factor1reg)$coef[4,2]
ts1[t] <<- summary(factor1reg)$coef[4,3]
ps1[t] <<- summary(factor1reg)$coef[4,4]
coefs2[t] <<- summary(factor2reg)$coef[4,1]
sds2[t] <<- summary(factor2reg)$coef[4,2]
ts2[t] <<- summary(factor2reg)$coef[4,3]
ps2[t] <<- summary(factor2reg)$coef[4,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.prot <- matrix(nrow=9, ncol=3)
rownames(fails.prot) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35")
colnames(fails.prot) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
protfunc(1000, bias3[t], bias4[t], precs[u])
estimates <- data.frame(estimates)
fails.prot[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1])
}
}
##### part 3: catholicism
### define master function
cathfunc <- function(n=1, bias1=0, bias2=0, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (additive)
for(i in 1:nrow(fulldata)) {
if(fulldata$catholic[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] + bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$catholic[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] + bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[5,1]
sds1[t] <<- summary(factor1reg)$coef[5,2]
ts1[t] <<- summary(factor1reg)$coef[5,3]
ps1[t] <<- summary(factor1reg)$coef[5,4]
coefs2[t] <<- summary(factor2reg)$coef[5,1]
sds2[t] <<- summary(factor2reg)$coef[5,2]
ts2[t] <<- summary(factor2reg)$coef[5,3]
ps2[t] <<- summary(factor2reg)$coef[5,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.cath <- matrix(nrow=9, ncol=3)
rownames(fails.cath) <- c("bias1=20; bias2=0.025", "bias1=15; bias2=0.025", "bias1=10; bias2=0.025", "bias1=7; bias2=0.025", "bias1=5; bias2=0.025", "bias1=3; bias2=0.025", "bias1=1; bias2=0.025", "bias1=.5; bias2=0.025", "bias1=0; bias2=0")
colnames(fails.cath) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
cathfunc(1000, bias1[t], bias2[t], precs[u])
estimates <- data.frame(estimates)
fails.cath[t,u] <- length(estimates$coefs1[estimates$coefs1>0 & estimates$ps1<0.1 & estimates$ps2>0.1])
}
}
##### part 4: monarchy
### define master function
monfunc <- function(n=1, bias1=1, bias2=1, prec=1, ...) {
sub1(n) # create empty objects to store estimates
for(t in 1:n) {
sub2() # generate simulated factors
# introduce systematic bias (multiplicative)
for(i in 1:nrow(fulldata)) {
if(fulldata$monarchy[i] == 1) {
fulldata$rater1[i] <<- fulldata$rater1[i] * bias1
}
}
for(i in 1:nrow(fulldata)) {
if(fulldata$monarchy[i] == 1) {
fulldata$rater2[i] <<- fulldata$rater2[i] * bias2
}
}
sub3() # generate simulated errors
sub4() # generate simulated indicators
sub5() # run CFA model
sub6() # regress factor scores on country-level variables
# extract estimates
coefs1[t] <<- summary(factor1reg)$coef[7,1]
sds1[t] <<- summary(factor1reg)$coef[7,2]
ts1[t] <<- summary(factor1reg)$coef[7,3]
ps1[t] <<- summary(factor1reg)$coef[7,4]
coefs2[t] <<- summary(factor2reg)$coef[7,1]
sds2[t] <<- summary(factor2reg)$coef[7,2]
ts2[t] <<- summary(factor2reg)$coef[7,3]
ps2[t] <<- summary(factor2reg)$coef[7,4]
}
sub7() # collect estimates
}
### run master function
## create container to store results
fails.mon <- matrix(nrow=9, ncol=3)
rownames(fails.mon) <- c("bias1=bias2=1", "bias1=1.3; bias2=1.35", "bias1=1.5; bias2=1.55", "bias1=1.7; bias2=1.75", "bias1=2.3; bias2=2.35", "bias1=2.5; bias2=2.55", "bias1=2.7; bias2=2.75", "bias1=2.9; bias2=2.95", "bias1=3.3; bias2=3.35")
colnames(fails.mon) <- c("prec=1", "prec=0.001", "prec=1000")
## compute estimates
for(t in 1:9) {
print(paste("Batch", t))
for(u in 1:3) {
monfunc(1000, bias3[t], bias4[t], precs[u])
estimates <- data.frame(estimates)
fails.mon[t,u] <- length(estimates$ps1[estimates$ps1>0.1 & estimates$ps2>0.1])
}
}
##### part 5: show all results
fails.marx
fails.prot
fails.cath
fails.mon
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment