Apoorva Lal apoorvalal

## panel_balancing.R
pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue)
# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version
# remotes::install_github("apoorvalal/ebal")

# %% simulator for panel balancing
#' @param n number of units
#' @param t number of time periods
#' @param parallel_trends boolean for parallel trends
#' @param random_assignment boolean for random assignment of treatment
#' @param σ noise level in mapping from factor to outcome

## OB_ATT.R
# %% # obs lalonde data from Kline paper - init housekeeping
libreq(data.table, fixest, rio)
cps3 = import("cps3re74.dta") %>% setDT() %>% na.omit()
setnames(cps3, c("re78", "treat"), c("y", "W"));
xs = setdiff(colnames(cps3), c("y", 'W'))
W = cps3$W  %>% as.matrix(); Y = cps3$y  %>% as.matrix()
X = cbind(1, cps3[, ..xs]) %>% as.matrix()
X1 = X[W==1,]; X0 = X[W==0,]
N = length(W); N_t = sum(W)
# %% first way - KOB / kline - page 1

## synth_andor_did.R
library(CVXR); library(data.table)

# %% functions
# reshape panel data from long to wide for factor models / outcomes
panelMatrices = function(dt, unit_id, time_id, treat, outcome) {
  dt = as.data.table(dt)
  # function to extract first column, convert it to rownames for a matrix
  matfy = function(X) {
    idnames = as.character(X[[1]])
    X2 = as.matrix(X[, -1])

## neymanAllocationStrata.R
#' Compute Neyman allocation propensity scores for inference-optimal treatment assignment in data table [very fast]
#' @param df data.table
#' @param y outcome name
#' @param w treatment name
#' @param x covariate names (must all be discrete)
#' @return data.table with strata level conditional means, variances, propensity scores,
#'  and neyman allocation propensities.
#' @export
neymanAllocation = function(df, y, w, x){
    df1 = copy(df); N = nrow(df1)

## omnibusTestsOfHeterogeneity.R
rm(list = ls())
libreq(data.table, estimatr,
    grf,
    DoubleML, mlr3, mlr3learners, dmlUtils)

# %% linear effect heterogeneity
dfm_omnibus = function(y, w, X){
    n1 = sum(w); n0 = sum(1-w); K = ncol(X)
    # separate outcome models
    m1 = lm.fit(X[w==1,], y[w==1]); m0 = lm.fit(X[w==0,], y[w==0])

## simple_text_parse.py
# %%
from bs4 import BeautifulSoup
from urllib import request

import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from collections import Counter

## Thompson.R
rm(list = ls())
libreq(data.table, ggplot2)
set.seed(42)

# %%
thompson = function(n, K, reward_probs){
  # init choices and reward vectors
  choices  <- rewards  <- rep(NA, n)
  # n+1 X K*2 matrix of S and F counts successes stored in first K, failures in next K
  s_f = matrix(NA, nrow = n+1, K * 2) # +1 to accommodate last update step

## HC0_4_manual.R
library(car); library(sandwich)
data(auto)
# %%
fit = lm(price ~ mpg + weight, data = auto)
X = model.matrix(fit); n = nrow(X); k = ncol(X)
e = resid(fit)
A = crossprod(X)
H = X %*% solve(A) %*% t(X); h_ii = diag(H)
# cla =         (t(e) %*% e)/(n-k) %*% solve(t(X) %*% X)
CLA = as.numeric(crossprod(e)/(n-k)) * solve(A)

## deltaMethodExamples.R
# %%
library(car); library(sandwich)
m1 <- lm(time ~ t1 + t2, data = Transact)
m1 |> summary()
vcovmat = vcovHC(m1)

# %%
deltaMethod(m1, "t1/t2")
deltaMethod(m1, "t1/t2", vcov = vcovmat)

## OaxacaBlinderATTEstimation.R
rm(list = ls())
libreq(data.table, fixest, rio, ggplot2, ebal)


# %%
cps3 = import("cps3re74.dta") |> setDT()
cps3 = cps3 |> na.omit()
setnames(cps3, c("re78", "treat"), c("y", "d"))
xs = c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75")
	pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue)
	# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version
	# remotes::install_github("apoorvalal/ebal")

	# %% simulator for panel balancing
	#' @param n number of units
	#' @param t number of time periods
	#' @param parallel_trends boolean for parallel trends
	#' @param random_assignment boolean for random assignment of treatment
	#' @param σ noise level in mapping from factor to outcome
	# %% # obs lalonde data from Kline paper - init housekeeping
	libreq(data.table, fixest, rio)
	cps3 = import("cps3re74.dta") %>% setDT() %>% na.omit()
	setnames(cps3, c("re78", "treat"), c("y", "W"));
	xs = setdiff(colnames(cps3), c("y", 'W'))
	W = cps3$W %>% as.matrix(); Y = cps3$y %>% as.matrix()
	X = cbind(1, cps3[, ..xs]) %>% as.matrix()
	X1 = X[W==1,]; X0 = X[W==0,]
	N = length(W); N_t = sum(W)
	# %% first way - KOB / kline - page 1
	library(CVXR); library(data.table)

	# %% functions
	# reshape panel data from long to wide for factor models / outcomes
	panelMatrices = function(dt, unit_id, time_id, treat, outcome) {
	dt = as.data.table(dt)
	# function to extract first column, convert it to rownames for a matrix
	matfy = function(X) {
	idnames = as.character(X[[1]])
	X2 = as.matrix(X[, -1])
	#' Compute Neyman allocation propensity scores for inference-optimal treatment assignment in data table [very fast]
	#' @param df data.table
	#' @param y outcome name
	#' @param w treatment name
	#' @param x covariate names (must all be discrete)
	#' @return data.table with strata level conditional means, variances, propensity scores,
	#' and neyman allocation propensities.
	#' @export
	neymanAllocation = function(df, y, w, x){
	df1 = copy(df); N = nrow(df1)
	rm(list = ls())
	libreq(data.table, estimatr,
	grf,
	DoubleML, mlr3, mlr3learners, dmlUtils)

	# %% linear effect heterogeneity
	dfm_omnibus = function(y, w, X){
	n1 = sum(w); n0 = sum(1-w); K = ncol(X)
	# separate outcome models
	m1 = lm.fit(X[w==1,], y[w==1]); m0 = lm.fit(X[w==0,], y[w==0])
	# %%
	from bs4 import BeautifulSoup
	from urllib import request

	import re
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer
	from nltk.tokenize import word_tokenize
	from collections import Counter
	rm(list = ls())
	libreq(data.table, ggplot2)
	set.seed(42)

	# %%
	thompson = function(n, K, reward_probs){
	# init choices and reward vectors
	choices <- rewards <- rep(NA, n)
	# n+1 X K*2 matrix of S and F counts successes stored in first K, failures in next K
	s_f = matrix(NA, nrow = n+1, K * 2) # +1 to accommodate last update step
	library(car); library(sandwich)
	data(auto)
	# %%
	fit = lm(price ~ mpg + weight, data = auto)
	X = model.matrix(fit); n = nrow(X); k = ncol(X)
	e = resid(fit)
	A = crossprod(X)
	H = X %% solve(A) %% t(X); h_ii = diag(H)
	# cla = (t(e) %% e)/(n-k) %% solve(t(X) %*% X)
	CLA = as.numeric(crossprod(e)/(n-k)) * solve(A)
	# %%
	library(car); library(sandwich)
	m1 <- lm(time ~ t1 + t2, data = Transact)
	m1 \|> summary()
	vcovmat = vcovHC(m1)

	# %%
	deltaMethod(m1, "t1/t2")
	deltaMethod(m1, "t1/t2", vcov = vcovmat)
	rm(list = ls())
	libreq(data.table, fixest, rio, ggplot2, ebal)



	# %%
	cps3 = import("cps3re74.dta") \|> setDT()
	cps3 = cps3 \|> na.omit()
	setnames(cps3, c("re78", "treat"), c("y", "d"))
	xs = c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75")