halflearned/rw.r

## rw.r
library(purrr)
library(Matrix)
library(sandwich)
library(MASS)


romano_wolf_correction <- function(t.orig, t.boot) {
  # See http://ftp.iza.org/dp12845.pdf page 8
  abs.t.orig <- abs(t.orig)
  abs.t.boot <- abs(t.boot)
  abs.t.sorted <- sort(abs.t.orig, decreasing = TRUE)

  max.order <- order(abs.t.orig, decreasing = TRUE)
  rev.order <- order(max.order)

  M <- nrow(t.boot)
  S <- ncol(t.boot)

  p.adj <- rep(0, S)
  p.adj[1] <- mean(apply(abs.t.boot, 1, max) > abs.t.sorted[1])
  for (s in seq(2, S)) {
    cur.index <- max.order[s:S]
    p.init <- mean(apply(abs.t.boot[, cur.index, drop=FALSE], 1, max) > abs.t.sorted[s])
    p.adj[s] <- max(p.init, p.adj[s-1])
  }
  p.adj[rev.order]
}


summary_rw <- function(models, indices=NULL, cov.type="HC2", num.boot=10000, seed=2020) {

  if (is.null(indices)) indices <- map(models, ~ 1:nrow(coef(summary(.))))

  summaries <- map2(indices, models, ~ coef(summary(.y))[.x,,drop=FALSE])
  t.orig <- unlist(map(summaries, ~ .[, "t value"]))
  beta.cov <- as.matrix(bdiag(map2(indices, models, ~ vcovHC(.y, type=cov.type)[.x, .x])))
  se.orig <- sqrt(diag(beta.cov))
  num.coef <- length(se.orig)

  # Null resampling
  beta.boot <- mvrnorm(n=num.boot, mu=rep(0, num.coef), Sigma=beta.cov)
  t.boot <- sweep(beta.boot, 2, se.orig, "/")
  p.adj <- romano_wolf_correction(t.orig, t.boot)

  # Some indexing trickery to revert
  end <- cumsum(sapply(indices, length))
  beg <- c(1, end[1:length(end)-1] + 1)
  result <- pmap(list(summaries, beg, end), function(summ, b, e) {
    tab <- cbind(summ[,c(1,2,4),drop=F], p.adj[b:e])
    colnames(tab) <- c('estimate', 'std. err', 'unadj p-value', 'adj p-value')
    tab
  })
  result
}


# Examples
set.seed(1234)
x.interest <- runif(100)
x.not.interest <- runif(100)
y1 <- 1 * x.interest + rnorm(100)
y2 <- runif(100)

# Correcting all coefficients across regressions
ols1 <- lm(y1 ~ x.interest + x.not.interest)
ols2 <- lm(y2 ~ x.interest + x.not.interest)
summary_rw(list(ols1, ols2))

# Correcting only the coefficient of interest across regressions
ols1 <- lm(y1 ~ x.interest + x.not.interest)
ols2 <- lm(y2 ~ x.interest + x.not.interest)
summary_rw(list(ols1, ols2), indices=list(c(2), c(2)))
	library(purrr)
	library(Matrix)
	library(sandwich)
	library(MASS)



	romano_wolf_correction <- function(t.orig, t.boot) {
	# See http://ftp.iza.org/dp12845.pdf page 8
	abs.t.orig <- abs(t.orig)
	abs.t.boot <- abs(t.boot)
	abs.t.sorted <- sort(abs.t.orig, decreasing = TRUE)

	max.order <- order(abs.t.orig, decreasing = TRUE)
	rev.order <- order(max.order)

	M <- nrow(t.boot)
	S <- ncol(t.boot)

	p.adj <- rep(0, S)
	p.adj[1] <- mean(apply(abs.t.boot, 1, max) > abs.t.sorted[1])
	for (s in seq(2, S)) {
	cur.index <- max.order[s:S]
	p.init <- mean(apply(abs.t.boot[, cur.index, drop=FALSE], 1, max) > abs.t.sorted[s])
	p.adj[s] <- max(p.init, p.adj[s-1])
	}
	p.adj[rev.order]
	}


	summary_rw <- function(models, indices=NULL, cov.type="HC2", num.boot=10000, seed=2020) {

	if (is.null(indices)) indices <- map(models, ~ 1:nrow(coef(summary(.))))

	summaries <- map2(indices, models, ~ coef(summary(.y))[.x,,drop=FALSE])
	t.orig <- unlist(map(summaries, ~ .[, "t value"]))
	beta.cov <- as.matrix(bdiag(map2(indices, models, ~ vcovHC(.y, type=cov.type)[.x, .x])))
	se.orig <- sqrt(diag(beta.cov))
	num.coef <- length(se.orig)

	# Null resampling
	beta.boot <- mvrnorm(n=num.boot, mu=rep(0, num.coef), Sigma=beta.cov)
	t.boot <- sweep(beta.boot, 2, se.orig, "/")
	p.adj <- romano_wolf_correction(t.orig, t.boot)

	# Some indexing trickery to revert
	end <- cumsum(sapply(indices, length))
	beg <- c(1, end[1:length(end)-1] + 1)
	result <- pmap(list(summaries, beg, end), function(summ, b, e) {
	tab <- cbind(summ[,c(1,2,4),drop=F], p.adj[b:e])
	colnames(tab) <- c('estimate', 'std. err', 'unadj p-value', 'adj p-value')
	tab
	})
	result
	}



	# Examples
	set.seed(1234)
	x.interest <- runif(100)
	x.not.interest <- runif(100)
	y1 <- 1 * x.interest + rnorm(100)
	y2 <- runif(100)

	# Correcting all coefficients across regressions
	ols1 <- lm(y1 ~ x.interest + x.not.interest)
	ols2 <- lm(y2 ~ x.interest + x.not.interest)
	summary_rw(list(ols1, ols2))

	# Correcting only the coefficient of interest across regressions
	ols1 <- lm(y1 ~ x.interest + x.not.interest)
	ols2 <- lm(y2 ~ x.interest + x.not.interest)
	summary_rw(list(ols1, ols2), indices=list(c(2), c(2)))