I hereby claim:
- I am stassajin on github.
- I am stassajin (https://keybase.io/stassajin) on keybase.
- I have a public key ASCgFOn7OF-Xnk4rKYYLqm6FUhEGDxCmg9ba3Vd6ecGWTAo
To claim this, I am signing this object:
def estimate_rf(df): | |
X = df.income.values.reshape(-1,1) | |
y = df.minutes.values | |
rf_reg = RandomForestRegressor(100, n_jobs=8, oob_score=True) | |
rf_reg.fit(X,y) | |
# store residuals | |
y_new = df.minutes - reg.oob_prediction_ | |
return y_new[df[group == 1]].mean() - y_new[df[group == 0]].mean() |
def estimate_cuped(df): | |
# estimate the theta that minimizes the control variate | |
theta = df.cov().loc["income", "minutes"] / df.cov().loc["income", "income"] | |
y_new = df.minutes - theta * df.income | |
return y_new[df[group == 1]].mean() - y_new[df[group == 0]].mean() |
def generate_data(group_size, effect_size = 0): | |
group1 = np.array([1] * group_size) | |
group2 = np.array([0] * group_size) | |
minutes = np.maximum(0, np.random.normal(25, 20, size = 2*group_size).astype(int)) | |
income = (np.random.normal(10000, 2000, size = 2*group_size) * minutes).astype(int) | |
assets = (np.random.normal(100000, 20000, size = 2*group_size) * minutes).astype(int) | |
group_assignment = np.concatenate([group1, group2]) | |
np.random.shuffle(group_assignment) | |
data = pd.DataFrame({'group': group_assignment, | |
'minutes': minutes, |
from typing import Union, Dict | |
from enum import Enum | |
from pydantic.dataclasses import dataclass | |
from pydantic import ValidationError, validator | |
from pydantic import confloat, conint | |
class ProcedureType(Enum): | |
cancer = "cancer" | |
flu = "flu" | |
from hypothesis import strategies as st | |
from hypothesis import given | |
import pytest | |
@given(data=st.from_type(UserAssessment)) | |
def test_apply_treatment(data): | |
result = apply_treatment(data) | |
# specify asserts in here |
from typing import Optional, Union | |
from enum import Enum | |
from dataclasses import dataclass | |
from pydantic import ValidationError | |
class ProcedureType(Enum): | |
cancer = "cancer" | |
flu = "flu" |
bias_var = np.array('gender_male') | |
bias_idx = np.argwhere(np.isin(np.array(feature_names), bias_var))[0] | |
interactions[:, bias_idx, :] = 0 | |
interactions[:, :, bias_idx] = 0 | |
y_hat_no_bias = np.exp(interactions.sum(axis = 1).sum(axis = 1)) - 1 |
feature_names = dtest.feature_names | |
interactions = xgb_model.predict(dtest, pred_interactions=True) | |
pd.DataFrame(interactions[0], | |
index = feature_names + ['intercept'], | |
columns= feature_names + ['intercept']) |
import pandas as pd | |
import numpy as np | |
import xgboost as xgb | |
from sklearn.model_selection import train_test_split | |
data = pd.read_feather('data.feather') | |
data = pd.get_dummies(data, drop_first=True) | |
X_train, X_test, y_train, y_test = train_test_split(data.drop('salary', axis = 1), | |
np.log1p(data.salary.values), | |
test_size =.30, |
I hereby claim:
To claim this, I am signing this object: