Apoorva Lal apoorvalal

## linRegressionInference.R
library(momentfit); library(car); library(tictoc)
set.seed(42)
# %%
dgp = \(n=500, k = 2){
  X = matrix(rnorm(n * 2), n, 2)
  Y1 = X[, 1] + X[, 1]^2 + runif(n, -0.5, 0.5)
  Y0 = X[, 2] + X[, 2]^2 + runif(n, -1, 1)
  Z = rbinom(n, 1, 0.6)
  Y = Z * Y1 + (1-Z) * Y0
  data.frame(Y, Z, X)

## linear_mediation.py
from joblib import Parallel, delayed
import numpy as np
import pandas as pd


class LinearMediation:
    def __init__(self):
        pass

    def fit(self, X, W, y, store=True):

## texJanus.tex
\documentclass[%
  article,
  % beamer,
  beameroptions={ignorenonframetext,14pt},
  articleoptions={a4paper,12pt},
  also={trans,handout,article}
  ]{beamerswitch}
\handoutlayout{nup=3plus,border=1pt}
\articlelayout{maketitle,frametitles=none}
\mode<article>{\usepackage[hmargin=2cm,vmargin=2cm]{geometry}}

## ml_powered_covariate_adjustment.py
import numpy as np
import pandas as pd
from scipy.stats import norm
from sklearn.model_selection import cross_val_predict, KFold

# learners
from xgboost import XGBRegressor
from glum import GeneralizedLinearRegressorCV
from sklearn.kernel_ridge import KernelRidge

## fwl_estimates_and_se.R
library(estimatr)
data(auto)
# %% FWL regression coefficient
auto$ytil = lm(price ~ displacement, auto)$resid
auto$x2til = lm(weight ~ displacement, auto)$resid
(fwlest = lm_robust(ytil ~ x2til, auto, se_type = "HC0")
  %>% summary %>% .$coefficients %>% .[2, 1:2])
# %%
(fullest =
  lm_robust(price ~ weight + displacement, auto, se_type = "HC0") %>%

## ols_lean.py
import numpy as np
from scipy.linalg import lstsq
np.random.seed(42)
# %%
def ols(X, y, vcov = 'HC1', driver = 'gelsy'):
    """
    Fast, minimal implementation of least squares regression with robust SEs

    Args:
        X:          n X p array of covariates

## simulate_adjustment_strategies.R
# %%
pacman::p_load(knitr, tidyverse, DeclareDesign, glmnet)
set.seed(42)

# %% estimator functions
p_hacker = function(data) {
  fit_1 = lm_robust(Y ~ Z + X1, data = data)
  fit_3 = lm_robust(Y ~ Z + X1 + X2, data = data)
  fit_2 = lm_robust(Y ~ Z + X2 + X3 + X4, data = data)
  fit_4 = lm_robust(Y ~ Z + X3 + X4 + X5 + X6 + X7 + X8 + X9, data = data)

## panel_balancing.R
pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue)
# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version
# remotes::install_github("apoorvalal/ebal")

# %% simulator for panel balancing
#' @param n number of units
#' @param t number of time periods
#' @param parallel_trends boolean for parallel trends
#' @param random_assignment boolean for random assignment of treatment
#' @param σ noise level in mapping from factor to outcome

## OB_ATT.R
# %% # obs lalonde data from Kline paper - init housekeeping
libreq(data.table, fixest, rio)
cps3 = import("cps3re74.dta") %>% setDT() %>% na.omit()
setnames(cps3, c("re78", "treat"), c("y", "W"));
xs = setdiff(colnames(cps3), c("y", 'W'))
W = cps3$W  %>% as.matrix(); Y = cps3$y  %>% as.matrix()
X = cbind(1, cps3[, ..xs]) %>% as.matrix()
X1 = X[W==1,]; X0 = X[W==0,]
N = length(W); N_t = sum(W)
# %% first way - KOB / kline - page 1

## synth_andor_did.R
library(CVXR); library(data.table)

# %% functions
# reshape panel data from long to wide for factor models / outcomes
panelMatrices = function(dt, unit_id, time_id, treat, outcome) {
  dt = as.data.table(dt)
  # function to extract first column, convert it to rownames for a matrix
  matfy = function(X) {
    idnames = as.character(X[[1]])
    X2 = as.matrix(X[, -1])
	library(momentfit); library(car); library(tictoc)
	set.seed(42)
	# %%
	dgp = \(n=500, k = 2){
	X = matrix(rnorm(n * 2), n, 2)
	Y1 = X[, 1] + X[, 1]^2 + runif(n, -0.5, 0.5)
	Y0 = X[, 2] + X[, 2]^2 + runif(n, -1, 1)
	Z = rbinom(n, 1, 0.6)
	Y = Z * Y1 + (1-Z) * Y0
	data.frame(Y, Z, X)
	from joblib import Parallel, delayed
	import numpy as np
	import pandas as pd


	class LinearMediation:
	def __init__(self):
	pass

	def fit(self, X, W, y, store=True):
	\documentclass[%
	article,
	% beamer,
	beameroptions={ignorenonframetext,14pt},
	articleoptions={a4paper,12pt},
	also={trans,handout,article}
	]{beamerswitch}
	\handoutlayout{nup=3plus,border=1pt}
	\articlelayout{maketitle,frametitles=none}
	\mode<article>{\usepackage[hmargin=2cm,vmargin=2cm]{geometry}}
	import numpy as np
	import pandas as pd
	from scipy.stats import norm
	from sklearn.model_selection import cross_val_predict, KFold

	# learners
	from xgboost import XGBRegressor
	from glum import GeneralizedLinearRegressorCV
	from sklearn.kernel_ridge import KernelRidge
	library(estimatr)
	data(auto)
	# %% FWL regression coefficient
	auto$ytil = lm(price ~ displacement, auto)$resid
	auto$x2til = lm(weight ~ displacement, auto)$resid
	(fwlest = lm_robust(ytil ~ x2til, auto, se_type = "HC0")
	%>% summary %>% .$coefficients %>% .[2, 1:2])
	# %%
	(fullest =
	lm_robust(price ~ weight + displacement, auto, se_type = "HC0") %>%
	import numpy as np
	from scipy.linalg import lstsq
	np.random.seed(42)
	# %%
	def ols(X, y, vcov = 'HC1', driver = 'gelsy'):
	"""
	Fast, minimal implementation of least squares regression with robust SEs

	Args:
	X: n X p array of covariates
	# %%
	pacman::p_load(knitr, tidyverse, DeclareDesign, glmnet)
	set.seed(42)

	# %% estimator functions
	p_hacker = function(data) {
	fit_1 = lm_robust(Y ~ Z + X1, data = data)
	fit_3 = lm_robust(Y ~ Z + X1 + X2, data = data)
	fit_2 = lm_robust(Y ~ Z + X2 + X3 + X4, data = data)
	fit_4 = lm_robust(Y ~ Z + X3 + X4 + X5 + X6 + X7 + X8 + X9, data = data)
	pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue)
	# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version
	# remotes::install_github("apoorvalal/ebal")

	# %% simulator for panel balancing
	#' @param n number of units
	#' @param t number of time periods
	#' @param parallel_trends boolean for parallel trends
	#' @param random_assignment boolean for random assignment of treatment
	#' @param σ noise level in mapping from factor to outcome
	# %% # obs lalonde data from Kline paper - init housekeeping
	libreq(data.table, fixest, rio)
	cps3 = import("cps3re74.dta") %>% setDT() %>% na.omit()
	setnames(cps3, c("re78", "treat"), c("y", "W"));
	xs = setdiff(colnames(cps3), c("y", 'W'))
	W = cps3$W %>% as.matrix(); Y = cps3$y %>% as.matrix()
	X = cbind(1, cps3[, ..xs]) %>% as.matrix()
	X1 = X[W==1,]; X0 = X[W==0,]
	N = length(W); N_t = sum(W)
	# %% first way - KOB / kline - page 1
	library(CVXR); library(data.table)

	# %% functions
	# reshape panel data from long to wide for factor models / outcomes
	panelMatrices = function(dt, unit_id, time_id, treat, outcome) {
	dt = as.data.table(dt)
	# function to extract first column, convert it to rownames for a matrix
	matfy = function(X) {
	idnames = as.character(X[[1]])
	X2 = as.matrix(X[, -1])