Noah Greifer ngreifer

## vcovSUEST.R
# Computes joint HC0 covariance matrix of several models fit to the same data.
# `fits` should be a list of model fits (e.g., output of a call to lm or glm, etc.)
# To include models fit to subsets of data, fit models to whole dataset with weights
# close to 0 for units to be excluded. Relies on `sandwich` functionality. Returns
# a symmetric matrix with no dimnames. Individual model covariances are on the block
# diagonals; between-model covariances are on the off-diagonals. See
# https://github.com/kylebutts/vcovSUR for a more mature implementation. See Mize
# et al. (2019) <https://doi.org/10.1177/0081175019852763> for theory and
# application.

## constrained_sample.R
constrained_sample <- function(X, ns = .5*nrow(X), tols = .01, targets = colMeans(X), time = 2*60, solver = "glpk") {
  #Arguments
  #X - dataset (matrix) from which sample is to be drawn
  #ns - maximum size of the resulting sample
  #tols - maximum distance between resulting sample means and the targets
  #targets - target means for sample means to pursue
  #time - number of seconds before aborting optimizer
  #solver - which solver to use; "glpk" or "gurobi" (gurobi is better)
  #
  #Output: a vector of indices of X to retain in the sample

## subclass_split.R
# Implements the subclass splitting algorithm described by Imbens & Rubin (2015, Sec 13.5)
# Arguments:
# - ps: a vector of (linearized) propensity scores
# - z: a vector of treatment status (2 values, doesn't have to be 0/1)
# - tmax: the threshold of the t-statistic used to determine whether imbalance remains and
#         s plit should be formed. High values make splits less likely.
# - minn: the minimum number of units of each treatment group allowed in each subclass
# - focal: the treatment group where the subclass-wise median ps is computed; leave
#          NULL to use the full sample
#
	# Computes joint HC0 covariance matrix of several models fit to the same data.
	# `fits` should be a list of model fits (e.g., output of a call to lm or glm, etc.)
	# To include models fit to subsets of data, fit models to whole dataset with weights
	# close to 0 for units to be excluded. Relies on `sandwich` functionality. Returns
	# a symmetric matrix with no dimnames. Individual model covariances are on the block
	# diagonals; between-model covariances are on the off-diagonals. See
	# https://github.com/kylebutts/vcovSUR for a more mature implementation. See Mize
	# et al. (2019) <https://doi.org/10.1177/0081175019852763> for theory and
	# application.
	constrained_sample <- function(X, ns = .5nrow(X), tols = .01, targets = colMeans(X), time = 260, solver = "glpk") {
	#Arguments
	#X - dataset (matrix) from which sample is to be drawn
	#ns - maximum size of the resulting sample
	#tols - maximum distance between resulting sample means and the targets
	#targets - target means for sample means to pursue
	#time - number of seconds before aborting optimizer
	#solver - which solver to use; "glpk" or "gurobi" (gurobi is better)
	#
	#Output: a vector of indices of X to retain in the sample
	# Implements the subclass splitting algorithm described by Imbens & Rubin (2015, Sec 13.5)
	# Arguments:
	# - ps: a vector of (linearized) propensity scores
	# - z: a vector of treatment status (2 values, doesn't have to be 0/1)
	# - tmax: the threshold of the t-statistic used to determine whether imbalance remains and
	# s plit should be formed. High values make splits less likely.
	# - minn: the minimum number of units of each treatment group allowed in each subclass
	# - focal: the treatment group where the subclass-wise median ps is computed; leave
	# NULL to use the full sample
	#