Skip to content

Instantly share code, notes, and snippets.

@samcarlos
Created March 2, 2020 20:24
Show Gist options
  • Save samcarlos/a6ec5334f52af0ee283efb0251fc1a1b to your computer and use it in GitHub Desktop.
Save samcarlos/a6ec5334f52af0ee283efb0251fc1a1b to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from ds_projects.lift_model.erupt import get_erupts_curves_aupc
def get_simple_uplift_data(num_obs):
"""Creates sample uplift dataset with 3 variables.
First two variables are of form y_i = x_i*t + e for two responses
Thrid response is just noise
Args:
num_obs (int): number of observations to simulate from
Returns:
responses, explanatory variables, and treatment
"""
tmt = np.random.binomial(1, .5, num_obs)
x = np.concatenate([np.random.uniform(0, 1, num_obs).reshape(-1,1),
np.random.uniform(0, 1, num_obs).reshape(-1,1)], axis = 1)
y_1 = tmt*x[:,0] + np.random.normal(0, .1, num_obs)
y_2 = tmt*x[:,1] + np.random.normal(0, .1, num_obs)
y_3 = np.random.normal(0, 1, num_obs).reshape(-1,1)
y = np.concatenate([y_1.reshape(-1,1), y_2.reshape(-1,1),y_3.reshape(-1,1)], axis = 1)
return y, x, tmt
#get data
y, x, t = get_simple_uplift_data(10000)
y_test, x_test, t_test = get_simple_uplift_data(10000)
x_train = np.concatenate([t.reshape(-1,1),x], axis = 1)
x_test = np.concatenate([t_test.reshape(-1,1),x_test], axis = 1)
#build model
rf = RandomForestRegressor(n_estimators=100,
random_state=2, n_jobs = 4)
rf.fit(x_train, y)
#get conterfactuals
x_test_0 = x_test.copy()
x_test_0[:,0] = 0
x_test_1 = x_test.copy()
x_test_1[:,0] = 1
pred_y_0 = rf.predict(x_test_0)
pred_y_1 = rf.predict(x_test_1)
counterfactuals = [pred_y_0,pred_y_1]
#create weight matrix for erupt
object_weights = np.zeros(33).reshape(11,3)
object_weights[:,0] = [x / 10 for x in range(11)]
object_weights[:,1] = [-np.round((1 - x / 10),1) for x in range(11)]
#calculation of erupts
erupts, distributions = get_erupts_curves_aupc(y_test, t_test, counterfactuals, np.array([0,1]),
object_weights, names = np.array(['fees', 'costs', 'noise']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment