Skip to content

Instantly share code, notes, and snippets.

View halflearned's full-sized avatar

Vitor Hadad halflearned

View GitHub Profile
@halflearned
halflearned / ivreg_in_python.py
Created March 21, 2018 07:16
IV regression in Python with clustered robust SE (maybe)
# Native Python packages
import numpy as np
import pandas as pd
# R2pi package
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.packages import importr
# R imports (Assuming AER and ivpack are installed in R)
base = importr("base")
# Reference: https://github.com/swager/grf/issues/247
# Here I shut off nonlinearity in true model, just for diagnostics
library(tidyverse)
library(grf)
simple_oracle_data <- function(nsamp,
beta=c(2,-1,0,0,0),
te=1,
hete=1,
library(tidyverse)
test_fun <- function(n=200, p=20, num.trees=NULL, mtry=NULL) {
# Creating regressors: x1, ..., xp
X <- as.data.frame(matrix(rnorm(n*p), n, p))
colnames(X) <- paste("x", seq_len(p), sep="")
# Let x1 be the only relevant covariate
Y <- X[,1]
@halflearned
halflearned / gist:30a1e1f5b81cb95b97fb7687252e725b
Created July 13, 2018 06:12
grf::causal_forest with low overlap
library(grf)
n <- 100
# X ~ Unif[-5,-2]+[2, 5]
X <- matrix(c(seq(-5, -2, length.out=n/2), seq(2, 5, length.out=n/2)), n, 1)
# P( W=1 | X > 0) = 1 and P( W=1 | X < 0) = 1
W <- matrix(X[,1] > 0, n, 1)
@halflearned
halflearned / gist:eb54423187ee3d2d9e15a0230dd4688e
Last active July 16, 2018 00:00
'argument is not a matrix' and related bugs
# PART II
library(DiceKriging)
n <- 100
k <- 3
# Design (fit.draws in tune_*_forest)
design <- matrix(runif(n*k), n, k)
# If both 'response' and 'noise.var' are constants,
@halflearned
halflearned / gist:1ed2180ce790d2bb63040b38a31f5369
Created July 15, 2018 23:47
DiceKriging-related bugs in grf
library(grf)
n <- 100
k <- 3
X <- matrix(rnorm(n*k), n, k)
# Outcome is constant!
Y_constant <- matrix(rep(0, n))
@halflearned
halflearned / fill_skipping.cpp
Last active July 20, 2018 23:09
Filling vector while skipping values
#include <iostream>
#include <set>
#include <vector>
void fill_skipping(std::vector<size_t>& result,
std::set<size_t>& skip) {
auto p_skip = skip.begin();
auto p_result = result.begin();
library(grf)
# Some forest
n <- 1000
k <- 3
X <- matrix(runif(n*k), nrow=n, ncol=k)
Y <- matrix(runif(n), nrow=n, ncol=1)
forest <- grf::regression_forest(X, Y)
# Get tree
@halflearned
halflearned / parameters_dont_matter.R
Created November 12, 2018 04:16
alpha and imbalance.penalty don't matter
library(grf)
p = 3
n = 2000
sigma = 0.1
X = matrix(2 * runif(n * p) - 1, n, p)
W = rbinom(n, 1, 0.1)
TAU = (X[,1] > 0)
Y = TAU * (W - 1/2) + sigma * rnorm(n)
@halflearned
halflearned / gist:8dea6ab05fa75cc7e01d7b9b779279a5
Created November 13, 2018 05:41
Nonzero imbalance.penalty causes trees to split less (and tuning to fail)
library(grf)
set.seed(123)
# Very simple scenario, but already adversarial
n <- 200
p <- 2
X <- matrix(rnorm(n * p), n, p)
W <- rbinom(p=0.5, size=1, n=n)
eta <- 1