Skip to content

Instantly share code, notes, and snippets.

@Jsevillamol
Created June 22, 2020 18:15
Show Gist options
  • Save Jsevillamol/41ef7055118e673e45df22b756c7a6b3 to your computer and use it in GitHub Desktop.
Save Jsevillamol/41ef7055118e673e45df22b756c7a6b3 to your computer and use it in GitHub Desktop.
library(haven)
library(dplyr)
library(lfe)
# Inline string concatenation
`%+%` <- function(a, b) paste(a, b, sep="")
# Moran statistic
library(ape)
my_moran <- function(residuals_input, coordinates){
ww <- as.matrix(dist(coordinates))
ww <- 1/ww
diag(ww) <- 0
ww[which(!is.finite(ww))] <- 0
Moran.I(residuals_input, ww, na.rm = TRUE)
}
# Choose variable to run code on and controls
outcome = "ilf"
exposure = "plow_m"
individual_controls =
" + age + age_sq + high_sc + less_hs" %+%
" + my_data$single + factor(year) + factor(metro)"
historical_country_controls =
" + agricultural_suitability_m + tropical_climate_m + large_animals_m" %+%
" + economic_complexity_m + political_hierarchies_m"
contemporary_country_controls = " + ln_income_m + ln_income2m"
controls =
" + factor(statefip)" %+%
individual_controls %+%
historical_country_controls %+%
contemporary_country_controls
# Prepare data
my_data <- read_dta("R/datasets/alesina_giuliano_nunn_qje_2013_replication_materials/Replication_Materials/cps_dataset.dta")
my_data <- my_data[complete.cases(pull(my_data, exposure)),]
## Add state latitude and longitude
gaz_counties_national <- read.delim("~/R/datasets/2019_Gaz_counties_national.txt")
gaz_counties_national$statefip <- floor(gaz_counties_national$GEOID/1000)
gaz_counties_national <- gaz_counties_national %>% select(c(statefip, INTPTLAT, INTPTLONG))
gaz_counties_national <- aggregate(gaz_counties_national, by=list(gaz_counties_national$statefip), FUN=mean)
my_data <- merge(x=my_data, y=gaz_counties_national, by.x="statefip", by.y="statefip", all.x=TRUE)
# Run regression
f = outcome %+% " ~ " %+% exposure %+% controls %+% " | 0 | 0 | mbpl"
my_lm <- felm(as.formula(f), data=my_data, na.action=na.exclude)
# Get some regression statistics
n <- nobs(my_lm)
R2 <- summary(my_lm)$adj.r.squared
# Compute standardized beta
r <- coef(summary(my_lm))[exposure, "Estimate"]
s <- coef(summary(my_lm))[exposure, "Cluster s.e."]
p <- coef(summary(my_lm))[exposure, 'Pr(>|t|)']
sigma_x <- sd(pull(my_data, exposure), na.rm=TRUE)
sigma_y <- sd(pull(my_data, outcome), na.rm=TRUE)
cohen_d <- r / sigma_y
cohen_ds <- s / sigma_y
r_adjusted <- r * sigma_x / sigma_y
s_adjusted <- s * sigma_x / sigma_y
# Compute Moran
## Redo OLS without clusters
f = outcome %+% " ~ " %+% exposure %+% controls
my_lm <- lm(as.formula(f), data=my_data, na.action=na.exclude)
## Add residuals to data
my_data$my_residuals <- resid(my_lm)
## Prune omitted values
omitted <- c(my_lm$na.action)
pruned_data <- my_data[-omitted, ]
## Select relevant columns
relevant_data <- pruned_data %>% select(c(INTPTLAT, INTPTLONG, my_residuals))
## Aggregate data by ethnicity
aggdata <-aggregate(relevant_data, by=list(pruned_data$mbpl), FUN=mean, na.rm=TRUE)
coords <- cbind(aggdata$INTPTLAT, aggdata$INTPTLONG)
moran_out <- my_moran(aggdata$my_residuals, coords)
moran_z <- (moran_out$observed - moran_out$expected) / moran_out$sd
moran_p <- moran_out$p.value
# Print results
print(sprintf('outcome = ' %+% outcome))
print(sprintf('exposure = ' %+% exposure))
print(sprintf('n = %d', n))
print(sprintf('r = %.2f (%.2f)', r, s))
print(sprintf('d = %.2f (%.2f)', cohen_d, cohen_ds))
print(sprintf('beta = %.2f (%.2f)', r_adjusted, s_adjusted))
print(sprintf('p = %.2f', p))
print(sprintf('Adjusted R2 = %.2f', R2))
print(sprintf("Moran's Z = %.2f", moran_z))
print(sprintf("Moran's p = %.2f", moran_p))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment