-
-
Save nk027/44af20da3e337f69e0052870ef21e8ed to your computer and use it in GitHub Desktop.
Replication script for a comment on linking malaria and trade.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Regression data --- | |
# Read replication file | |
library("dplyr") # Lazy data transformation | |
library("readxl") # To read xl | |
regr_orig <- readxl::read_xlsx(path = "data/Chaves et al_Regression.xlsx", | |
range = "C4:J59", col_names = FALSE) %>% | |
`colnames<-`(c("year", "tree_loss_a", "cases", "time", | |
"treeloss", "nets", "therapy", "cases_adj")) | |
# They consider "cases ~ treeloss + nets + therapy" | |
regr <- regr_orig %>% | |
filter(year %% 1 == 0) # Duplicates add comma values to the year | |
# These are the hidden weights in Excel | |
wts <- c("2000" = 1, "2001" = 10, "2002" = 1, "2003" = 1, "2004" = 1, | |
"2005" = 24, "2006" = 1, "2007" = 1, "2008" = 1, "2009" = 1, "2010" = 1, | |
"2011" = 1, "2012" = 1, "2013" = 1, "2014" = 9, "2015" = 1) | |
# Note that dependent and forest-loss values for the 2001 duplicates vary: | |
# - y is from: 00, 01, 02, 03, 04, 01, 01, 01, 01, 01 | |
# - X is from: 00, 00, 01, 01, 01, 01, 01, 01, 01, 01 | |
# Nets and therapy remain at 0 anyways until 2002 / 2003 | |
wts_pct <- wts / sum(wts) | |
# High weights for 2005, 2001, and 2014 | |
wts_pct[order(wts, decreasing = TRUE)] | |
# Add time (0:16) | |
regr_orig <- mutate(regr_orig, time = year - 2000) | |
regr <- mutate(regr, time = year - 2000) | |
# Create varieties with differenced data (first observation is dropped) | |
regr_orig_d <- mutate(regr_orig, | |
cases = c(NA, diff(cases)), | |
treeloss = c(NA, diff(treeloss)), | |
nets = c(NA, diff(nets)), | |
therapy = c(NA, diff(therapy))) | |
regr_d <- mutate(regr, | |
cases = c(NA, diff(cases)), | |
treeloss = c(NA, diff(treeloss)), | |
nets = c(NA, diff(nets)), | |
therapy = c(NA, diff(therapy))) | |
# Estimate --- | |
# Reproduce Chaves et al. (2020) | |
mdl_orig <- lm(cases ~ treeloss + nets + therapy, regr_orig) | |
# Note that small deviations are also precent in the replication file | |
summary(mdl_orig) | |
# Reproduce with weights (correct DoF, naive wrt 2001 mess) | |
mdl_weigh <- lm(cases ~ treeloss + nets + therapy, regr, weights = wts) | |
# Reproduce without weights - note insignificance and tree-loss sign switch | |
mdl_plain <- lm(cases ~ treeloss + nets + therapy, regr) | |
# Add time as explanatory | |
mdl_orig_t <- lm(cases ~ treeloss + nets + therapy + time, regr_orig) | |
mdl_plain_t <- lm(cases ~ treeloss + nets + therapy + time, regr) | |
# Esimate with differenced variables | |
mdl_orig_d <- lm(cases ~ treeloss + nets + therapy, regr_orig_d) | |
mdl_plain_d <- lm(cases ~ treeloss + nets + therapy, regr_d) | |
# Test statistics --- | |
# ADF fails to reject non-stationarity | |
library("tseries") | |
tseries::adf.test(regr$cases) | |
tseries::adf.test(regr$treeloss) | |
tseries::adf.test(regr$nets) | |
tseries::adf.test(regr$therapy) | |
# PO test fails to reject no-cointegration | |
tseries::po.test(select(regr, cases, treeloss, nets, therapy)) | |
# Durbin Watson rejects no-autocorrelation | |
library("car") | |
car::durbinWatsonTest(mdl_orig) | |
# Outputs --- | |
# Plot data | |
pdf("img/img.pdf", width = 6, height = 4) | |
op <- par(mfrow = c(2, 2), mar = c(3, 3, 1, 0.5), mgp = c(2, 0.6, 0)) | |
plot(regr$year, regr$cases, type = "l", xlab = "Date", ylab = "Malaria cases") | |
plot(regr$year, regr$treeloss, type = "l", xlab = "Date", ylab = "Tree loss") | |
plot(regr$year, regr$nets, type = "l", xlab = "Date", ylab = "ITN") | |
plot(regr$year, regr$therapy, type = "l", xlab = "Date", ylab = "ACT") | |
par(op) | |
dev.off() | |
# Create table (ugly hack) | |
library("outreg") # lm to dataframe | |
library("memisc") # dataframe to LaTeX | |
tbl <- outreg(list("(1)" = mdl_orig, "(2)" = mdl_weigh, "(3)" = mdl_plain, | |
"(4)" = mdl_orig_t, "(5)" = mdl_orig_d), | |
R2 = TRUE, nobs = TRUE, adjR2 = FALSE, aic = FALSE) %>% | |
mutate(.variable = c("Constant", "", "Tree loss", "", | |
"ITN", "", "ACT", "", "Time", "", "$N$", "$R^2$")) %>% | |
dplyr::select(-2) %>% # Estimates and standard errors are implied | |
dplyr::rename(Variable = .variable) %>% | |
toLatex() | |
tbl <- gsub("(.*)lllll(.*)", "\\1lcccc\\2", tbl) # center columns | |
tbl <- gsub("\\[", "\\(", gsub("\\]", "\\)", tbl)) # () around std err | |
print(tbl) | |
# Plot influential observations | |
pdf("img/cooks.pdf", width = 8, height = 4) | |
op <- par(mfrow = c(1, 2), mar = c(3, 3, 3.5, 1.5), mgp = c(2, 0.6, 0)) | |
barplot(cooks.distance(mdl_weigh), main = "Weighted", ylim = c(0, 12.5)) | |
abline(h = c(1, 0.25), lty = 2) | |
barplot(cooks.distance(mdl_plain), main = "Unweighted", ylim = c(0, 12.5)) | |
abline(h = c(1, 0.25), lty = 2) | |
par(op) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment