samcarlos/erupt_tradeoffs_example.py

## erupt_tradeoffs_example.py
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from ds_projects.lift_model.erupt import get_erupts_curves_aupc

def get_simple_uplift_data(num_obs):
    """Creates sample uplift dataset with 3 variables.
    First two variables are of form y_i = x_i*t + e for two responses
    Thrid response is just noise

    Args:
        num_obs (int): number of observations to simulate from
    Returns:
        responses, explanatory variables, and treatment
    """

    tmt = np.random.binomial(1, .5, num_obs)
    x = np.concatenate([np.random.uniform(0, 1, num_obs).reshape(-1,1),
    np.random.uniform(0, 1, num_obs).reshape(-1,1)], axis = 1)

    y_1 = tmt*x[:,0] + np.random.normal(0, .1, num_obs)
    y_2 = tmt*x[:,1] + np.random.normal(0, .1, num_obs)
    y_3 =  np.random.normal(0, 1, num_obs).reshape(-1,1)

    y = np.concatenate([y_1.reshape(-1,1), y_2.reshape(-1,1),y_3.reshape(-1,1)], axis = 1)

    return y, x, tmt


#get data
y, x, t = get_simple_uplift_data(10000)
y_test, x_test, t_test = get_simple_uplift_data(10000)

x_train = np.concatenate([t.reshape(-1,1),x], axis = 1)
x_test = np.concatenate([t_test.reshape(-1,1),x_test], axis = 1)


#build model
rf = RandomForestRegressor(n_estimators=100,
                                random_state=2, n_jobs = 4)
rf.fit(x_train, y)

#get conterfactuals
x_test_0 = x_test.copy()
x_test_0[:,0] = 0

x_test_1 = x_test.copy()
x_test_1[:,0] = 1

pred_y_0 = rf.predict(x_test_0)
pred_y_1 = rf.predict(x_test_1)

counterfactuals = [pred_y_0,pred_y_1]


#create weight matrix for erupt
object_weights = np.zeros(33).reshape(11,3)
object_weights[:,0] = [x / 10 for x in range(11)]
object_weights[:,1] = [-np.round((1 - x / 10),1) for x in range(11)]

#calculation of erupts
erupts, distributions = get_erupts_curves_aupc(y_test, t_test, counterfactuals, np.array([0,1]),
object_weights, names = np.array(['fees', 'costs', 'noise']))
	import numpy as np
	import pandas as pd
	from sklearn.ensemble import RandomForestRegressor
	from ds_projects.lift_model.erupt import get_erupts_curves_aupc

	def get_simple_uplift_data(num_obs):
	"""Creates sample uplift dataset with 3 variables.
	First two variables are of form y_i = x_i*t + e for two responses
	Thrid response is just noise

	Args:
	num_obs (int): number of observations to simulate from
	Returns:
	responses, explanatory variables, and treatment
	"""

	tmt = np.random.binomial(1, .5, num_obs)
	x = np.concatenate([np.random.uniform(0, 1, num_obs).reshape(-1,1),
	np.random.uniform(0, 1, num_obs).reshape(-1,1)], axis = 1)

	y_1 = tmt*x[:,0] + np.random.normal(0, .1, num_obs)
	y_2 = tmt*x[:,1] + np.random.normal(0, .1, num_obs)
	y_3 = np.random.normal(0, 1, num_obs).reshape(-1,1)

	y = np.concatenate([y_1.reshape(-1,1), y_2.reshape(-1,1),y_3.reshape(-1,1)], axis = 1)

	return y, x, tmt



	#get data
	y, x, t = get_simple_uplift_data(10000)
	y_test, x_test, t_test = get_simple_uplift_data(10000)

	x_train = np.concatenate([t.reshape(-1,1),x], axis = 1)
	x_test = np.concatenate([t_test.reshape(-1,1),x_test], axis = 1)


	#build model
	rf = RandomForestRegressor(n_estimators=100,
	random_state=2, n_jobs = 4)
	rf.fit(x_train, y)

	#get conterfactuals
	x_test_0 = x_test.copy()
	x_test_0[:,0] = 0

	x_test_1 = x_test.copy()
	x_test_1[:,0] = 1

	pred_y_0 = rf.predict(x_test_0)
	pred_y_1 = rf.predict(x_test_1)

	counterfactuals = [pred_y_0,pred_y_1]


	#create weight matrix for erupt
	object_weights = np.zeros(33).reshape(11,3)
	object_weights[:,0] = [x / 10 for x in range(11)]
	object_weights[:,1] = [-np.round((1 - x / 10),1) for x in range(11)]

	#calculation of erupts
	erupts, distributions = get_erupts_curves_aupc(y_test, t_test, counterfactuals, np.array([0,1]),
	object_weights, names = np.array(['fees', 'costs', 'noise']))