Skip to content

Instantly share code, notes, and snippets.

@nk027
Last active September 3, 2020 12:44
Show Gist options
  • Save nk027/44af20da3e337f69e0052870ef21e8ed to your computer and use it in GitHub Desktop.
Save nk027/44af20da3e337f69e0052870ef21e8ed to your computer and use it in GitHub Desktop.
Replication script for a comment on linking malaria and trade.
# Regression data ---
# Read replication file
library("dplyr") # Lazy data transformation
library("readxl") # To read xl
regr_orig <- readxl::read_xlsx(path = "data/Chaves et al_Regression.xlsx",
range = "C4:J59", col_names = FALSE) %>%
`colnames<-`(c("year", "tree_loss_a", "cases", "time",
"treeloss", "nets", "therapy", "cases_adj"))
# They consider "cases ~ treeloss + nets + therapy"
regr <- regr_orig %>%
filter(year %% 1 == 0) # Duplicates add comma values to the year
# These are the hidden weights in Excel
wts <- c("2000" = 1, "2001" = 10, "2002" = 1, "2003" = 1, "2004" = 1,
"2005" = 24, "2006" = 1, "2007" = 1, "2008" = 1, "2009" = 1, "2010" = 1,
"2011" = 1, "2012" = 1, "2013" = 1, "2014" = 9, "2015" = 1)
# Note that dependent and forest-loss values for the 2001 duplicates vary:
# - y is from: 00, 01, 02, 03, 04, 01, 01, 01, 01, 01
# - X is from: 00, 00, 01, 01, 01, 01, 01, 01, 01, 01
# Nets and therapy remain at 0 anyways until 2002 / 2003
wts_pct <- wts / sum(wts)
# High weights for 2005, 2001, and 2014
wts_pct[order(wts, decreasing = TRUE)]
# Add time (0:16)
regr_orig <- mutate(regr_orig, time = year - 2000)
regr <- mutate(regr, time = year - 2000)
# Create varieties with differenced data (first observation is dropped)
regr_orig_d <- mutate(regr_orig,
cases = c(NA, diff(cases)),
treeloss = c(NA, diff(treeloss)),
nets = c(NA, diff(nets)),
therapy = c(NA, diff(therapy)))
regr_d <- mutate(regr,
cases = c(NA, diff(cases)),
treeloss = c(NA, diff(treeloss)),
nets = c(NA, diff(nets)),
therapy = c(NA, diff(therapy)))
# Estimate ---
# Reproduce Chaves et al. (2020)
mdl_orig <- lm(cases ~ treeloss + nets + therapy, regr_orig)
# Note that small deviations are also precent in the replication file
summary(mdl_orig)
# Reproduce with weights (correct DoF, naive wrt 2001 mess)
mdl_weigh <- lm(cases ~ treeloss + nets + therapy, regr, weights = wts)
# Reproduce without weights - note insignificance and tree-loss sign switch
mdl_plain <- lm(cases ~ treeloss + nets + therapy, regr)
# Add time as explanatory
mdl_orig_t <- lm(cases ~ treeloss + nets + therapy + time, regr_orig)
mdl_plain_t <- lm(cases ~ treeloss + nets + therapy + time, regr)
# Esimate with differenced variables
mdl_orig_d <- lm(cases ~ treeloss + nets + therapy, regr_orig_d)
mdl_plain_d <- lm(cases ~ treeloss + nets + therapy, regr_d)
# Test statistics ---
# ADF fails to reject non-stationarity
library("tseries")
tseries::adf.test(regr$cases)
tseries::adf.test(regr$treeloss)
tseries::adf.test(regr$nets)
tseries::adf.test(regr$therapy)
# PO test fails to reject no-cointegration
tseries::po.test(select(regr, cases, treeloss, nets, therapy))
# Durbin Watson rejects no-autocorrelation
library("car")
car::durbinWatsonTest(mdl_orig)
# Outputs ---
# Plot data
pdf("img/img.pdf", width = 6, height = 4)
op <- par(mfrow = c(2, 2), mar = c(3, 3, 1, 0.5), mgp = c(2, 0.6, 0))
plot(regr$year, regr$cases, type = "l", xlab = "Date", ylab = "Malaria cases")
plot(regr$year, regr$treeloss, type = "l", xlab = "Date", ylab = "Tree loss")
plot(regr$year, regr$nets, type = "l", xlab = "Date", ylab = "ITN")
plot(regr$year, regr$therapy, type = "l", xlab = "Date", ylab = "ACT")
par(op)
dev.off()
# Create table (ugly hack)
library("outreg") # lm to dataframe
library("memisc") # dataframe to LaTeX
tbl <- outreg(list("(1)" = mdl_orig, "(2)" = mdl_weigh, "(3)" = mdl_plain,
"(4)" = mdl_orig_t, "(5)" = mdl_orig_d),
R2 = TRUE, nobs = TRUE, adjR2 = FALSE, aic = FALSE) %>%
mutate(.variable = c("Constant", "", "Tree loss", "",
"ITN", "", "ACT", "", "Time", "", "$N$", "$R^2$")) %>%
dplyr::select(-2) %>% # Estimates and standard errors are implied
dplyr::rename(Variable = .variable) %>%
toLatex()
tbl <- gsub("(.*)lllll(.*)", "\\1lcccc\\2", tbl) # center columns
tbl <- gsub("\\[", "\\(", gsub("\\]", "\\)", tbl)) # () around std err
print(tbl)
# Plot influential observations
pdf("img/cooks.pdf", width = 8, height = 4)
op <- par(mfrow = c(1, 2), mar = c(3, 3, 3.5, 1.5), mgp = c(2, 0.6, 0))
barplot(cooks.distance(mdl_weigh), main = "Weighted", ylim = c(0, 12.5))
abline(h = c(1, 0.25), lty = 2)
barplot(cooks.distance(mdl_plain), main = "Unweighted", ylim = c(0, 12.5))
abline(h = c(1, 0.25), lty = 2)
par(op)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment