Jsevillamol/alesina_2013.R

## alesina_2013.R
library(haven)
library(dplyr)
library(lfe)

# Inline string concatenation
`%+%` <- function(a, b) paste(a, b, sep="")

# Moran statistic
library(ape)
my_moran <- function(residuals_input, coordinates){
  ww <- as.matrix(dist(coordinates))
  ww <- 1/ww
  diag(ww) <- 0
  ww[which(!is.finite(ww))] <- 0

  Moran.I(residuals_input, ww, na.rm = TRUE)
}

# Choose variable to run code on and controls
outcome = "ilf"
exposure = "plow_m"

individual_controls =
  " + age + age_sq + high_sc + less_hs" %+%
  " + my_data$single +  factor(year) + factor(metro)"

historical_country_controls =
  " + agricultural_suitability_m + tropical_climate_m + large_animals_m" %+%
  " + economic_complexity_m + political_hierarchies_m"

contemporary_country_controls = " + ln_income_m + ln_income2m"

controls =
  " + factor(statefip)"         %+%
  individual_controls           %+%
  historical_country_controls   %+%
  contemporary_country_controls

# Prepare data
my_data <- read_dta("R/datasets/alesina_giuliano_nunn_qje_2013_replication_materials/Replication_Materials/cps_dataset.dta")
my_data <- my_data[complete.cases(pull(my_data, exposure)),]

## Add state latitude and longitude
gaz_counties_national <- read.delim("~/R/datasets/2019_Gaz_counties_national.txt")

gaz_counties_national$statefip <- floor(gaz_counties_national$GEOID/1000)
gaz_counties_national <- gaz_counties_national %>% select(c(statefip, INTPTLAT, INTPTLONG))
gaz_counties_national <- aggregate(gaz_counties_national, by=list(gaz_counties_national$statefip), FUN=mean)
my_data <- merge(x=my_data, y=gaz_counties_national, by.x="statefip", by.y="statefip", all.x=TRUE)

# Run regression
f = outcome %+% " ~ " %+% exposure %+% controls %+% " | 0 | 0 | mbpl"
my_lm <- felm(as.formula(f), data=my_data, na.action=na.exclude)

# Get some regression statistics
n <- nobs(my_lm)
R2 <- summary(my_lm)$adj.r.squared

# Compute standardized beta
r <- coef(summary(my_lm))[exposure, "Estimate"]
s <- coef(summary(my_lm))[exposure, "Cluster s.e."]
p <- coef(summary(my_lm))[exposure, 'Pr(>|t|)']

sigma_x <- sd(pull(my_data, exposure), na.rm=TRUE)
sigma_y <- sd(pull(my_data, outcome), na.rm=TRUE)

cohen_d <- r / sigma_y
cohen_ds <- s / sigma_y

r_adjusted <- r * sigma_x / sigma_y
s_adjusted <- s * sigma_x / sigma_y

# Compute Moran
## Redo OLS without clusters
f = outcome %+% " ~ " %+% exposure %+% controls
my_lm <- lm(as.formula(f), data=my_data, na.action=na.exclude)

## Add residuals to data
my_data$my_residuals <- resid(my_lm)

## Prune omitted values
omitted <- c(my_lm$na.action)
pruned_data <- my_data[-omitted, ]

## Select relevant columns
relevant_data <- pruned_data %>% select(c(INTPTLAT, INTPTLONG, my_residuals))

## Aggregate data by ethnicity
aggdata <-aggregate(relevant_data, by=list(pruned_data$mbpl), FUN=mean, na.rm=TRUE)

coords <- cbind(aggdata$INTPTLAT, aggdata$INTPTLONG)

moran_out <- my_moran(aggdata$my_residuals, coords)
moran_z <- (moran_out$observed - moran_out$expected) / moran_out$sd
moran_p <- moran_out$p.value

# Print results
print(sprintf('outcome = ' %+% outcome))
print(sprintf('exposure = ' %+% exposure))
print(sprintf('n = %d', n))
print(sprintf('r = %.2f (%.2f)', r, s))
print(sprintf('d = %.2f (%.2f)', cohen_d, cohen_ds))
print(sprintf('beta = %.2f (%.2f)', r_adjusted, s_adjusted))
print(sprintf('p = %.2f', p))
print(sprintf('Adjusted R2 = %.2f', R2))
print(sprintf("Moran's Z = %.2f", moran_z))
print(sprintf("Moran's p = %.2f", moran_p))
	library(haven)
	library(dplyr)
	library(lfe)

	# Inline string concatenation
	`%+%` <- function(a, b) paste(a, b, sep="")

	# Moran statistic
	library(ape)
	my_moran <- function(residuals_input, coordinates){
	ww <- as.matrix(dist(coordinates))
	ww <- 1/ww
	diag(ww) <- 0
	ww[which(!is.finite(ww))] <- 0

	Moran.I(residuals_input, ww, na.rm = TRUE)
	}

	# Choose variable to run code on and controls
	outcome = "ilf"
	exposure = "plow_m"

	individual_controls =
	" + age + age_sq + high_sc + less_hs" %+%
	" + my_data$single + factor(year) + factor(metro)"

	historical_country_controls =
	" + agricultural_suitability_m + tropical_climate_m + large_animals_m" %+%
	" + economic_complexity_m + political_hierarchies_m"

	contemporary_country_controls = " + ln_income_m + ln_income2m"

	controls =
	" + factor(statefip)" %+%
	individual_controls %+%
	historical_country_controls %+%
	contemporary_country_controls

	# Prepare data
	my_data <- read_dta("R/datasets/alesina_giuliano_nunn_qje_2013_replication_materials/Replication_Materials/cps_dataset.dta")
	my_data <- my_data[complete.cases(pull(my_data, exposure)),]

	## Add state latitude and longitude
	gaz_counties_national <- read.delim("~/R/datasets/2019_Gaz_counties_national.txt")

	gaz_counties_national$statefip <- floor(gaz_counties_national$GEOID/1000)
	gaz_counties_national <- gaz_counties_national %>% select(c(statefip, INTPTLAT, INTPTLONG))
	gaz_counties_national <- aggregate(gaz_counties_national, by=list(gaz_counties_national$statefip), FUN=mean)
	my_data <- merge(x=my_data, y=gaz_counties_national, by.x="statefip", by.y="statefip", all.x=TRUE)

	# Run regression
	f = outcome %+% " ~ " %+% exposure %+% controls %+% " \| 0 \| 0 \| mbpl"
	my_lm <- felm(as.formula(f), data=my_data, na.action=na.exclude)

	# Get some regression statistics
	n <- nobs(my_lm)
	R2 <- summary(my_lm)$adj.r.squared

	# Compute standardized beta
	r <- coef(summary(my_lm))[exposure, "Estimate"]
	s <- coef(summary(my_lm))[exposure, "Cluster s.e."]
	p <- coef(summary(my_lm))[exposure, 'Pr(>\|t\|)']

	sigma_x <- sd(pull(my_data, exposure), na.rm=TRUE)
	sigma_y <- sd(pull(my_data, outcome), na.rm=TRUE)

	cohen_d <- r / sigma_y
	cohen_ds <- s / sigma_y

	r_adjusted <- r * sigma_x / sigma_y
	s_adjusted <- s * sigma_x / sigma_y

	# Compute Moran
	## Redo OLS without clusters
	f = outcome %+% " ~ " %+% exposure %+% controls
	my_lm <- lm(as.formula(f), data=my_data, na.action=na.exclude)

	## Add residuals to data
	my_data$my_residuals <- resid(my_lm)

	## Prune omitted values
	omitted <- c(my_lm$na.action)
	pruned_data <- my_data[-omitted, ]

	## Select relevant columns
	relevant_data <- pruned_data %>% select(c(INTPTLAT, INTPTLONG, my_residuals))

	## Aggregate data by ethnicity
	aggdata <-aggregate(relevant_data, by=list(pruned_data$mbpl), FUN=mean, na.rm=TRUE)

	coords <- cbind(aggdata$INTPTLAT, aggdata$INTPTLONG)

	moran_out <- my_moran(aggdata$my_residuals, coords)
	moran_z <- (moran_out$observed - moran_out$expected) / moran_out$sd
	moran_p <- moran_out$p.value

	# Print results
	print(sprintf('outcome = ' %+% outcome))
	print(sprintf('exposure = ' %+% exposure))
	print(sprintf('n = %d', n))
	print(sprintf('r = %.2f (%.2f)', r, s))
	print(sprintf('d = %.2f (%.2f)', cohen_d, cohen_ds))
	print(sprintf('beta = %.2f (%.2f)', r_adjusted, s_adjusted))
	print(sprintf('p = %.2f', p))
	print(sprintf('Adjusted R2 = %.2f', R2))
	print(sprintf("Moran's Z = %.2f", moran_z))
	print(sprintf("Moran's p = %.2f", moran_p))