smazzanti

## feature_names.py
feature_names = [
   'linear',            # 1
   'nonlinear_square',  # 2
   'nonlinear_sin',     # 3
   'interaction_1',     # 4
   'interaction_2',     # 5
   'interaction_3',     # 6
   'noise_1',           # 7
   'noise_2',           # 8
   'noise_3',           # 9

## X2y.py
def X2y(X, with_error = True):

    # functional form of the dependence between y and X
    y_star = X['linear'] + X['nonlinear_square'] ** 2 + np.sin(3 * X['nonlinear_sin']) + (X['interaction_1'] * X['interaction_2'] * X['interaction_3'])

    # add random error called epsilon (this will be used for creating y)
    if with_error:
        np.random.seed(0)
        epsilon = np.random.normal(0, .1, len(y_star))
        return y_star + epsilon

## makeXy.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# make X and y
np.random.seed(0)
X = pd.DataFrame(np.random.normal(size = (20_000, len(feature_names))), columns = feature_names)
y = X2y(X, with_error = True)

# make X_trn, X_tst, y_trn, y_tst

## models.py
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# dictionary of models that will be used for comparison
models = {

## unbeatableregressor.py
# define a sklearn compatible wrapper for our data generating function
class UnbeatableRegressor():

    def __init__(self):
        pass

    def fit(self, X, y):
        pass

    def predict(self, X):

## fitmodels.py
from sklearn.metrics import mean_absolute_error
from eli5.sklearn import PermutationImportance

mae = pd.DataFrame(columns = ['train', 'test'])
fi = pd.DataFrame(columns = feature_names)

for model_name in list(models.keys()):

    # fit model
    models[model_name].fit(X_trn, y_trn)

## boruta.py
from boruta import BorutaPy

# instantiate random forest
forest = RandomForestRegressor(n_jobs = -1, max_depth = 5)

# fit boruta
boruta_selector = BorutaPy(forest, n_estimators = 'auto', random_state = 0)
boruta_selector.fit(np.array(X_trn), np.array(y_trn))

# store results

## confusion_viz_snippet.py
from confusion_viz import ConfusionViz

y_test = [0, 0, 1, 0, 1]
probas_test = [.1, .2, .5, .7, .9]

conf_viz = ConfusionViz()

conf_viz.fit(
    y_true = y_test,
    probas_pred = probas_test

## confusion_viz_tldr.py
! pip install git+https://github.com/smazzanti/confusion_viz

from confusion_viz import ConfusionViz

conf_viz = ConfusionViz()
conf_viz.fit(y_true, probas_pred)
conf_viz.show()

## confusion_viz_install.py
!pip install git+https://github.com/smazzanti/confusion_viz
	feature_names = [
	'linear', # 1
	'nonlinear_square', # 2
	'nonlinear_sin', # 3
	'interaction_1', # 4
	'interaction_2', # 5
	'interaction_3', # 6
	'noise_1', # 7
	'noise_2', # 8
	'noise_3', # 9
	def X2y(X, with_error = True):

	# functional form of the dependence between y and X
	y_star = X['linear'] + X['nonlinear_square'] ** 2 + np.sin(3 * X['nonlinear_sin']) + (X['interaction_1'] * X['interaction_2'] * X['interaction_3'])

	# add random error called epsilon (this will be used for creating y)
	if with_error:
	np.random.seed(0)
	epsilon = np.random.normal(0, .1, len(y_star))
	return y_star + epsilon
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split

	# make X and y
	np.random.seed(0)
	X = pd.DataFrame(np.random.normal(size = (20_000, len(feature_names))), columns = feature_names)
	y = X2y(X, with_error = True)

	# make X_trn, X_tst, y_trn, y_tst
	from sklearn.dummy import DummyRegressor
	from sklearn.linear_model import LinearRegression
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.svm import SVR
	from sklearn.ensemble import RandomForestRegressor
	from xgboost import XGBRegressor
	from lightgbm import LGBMRegressor

	# dictionary of models that will be used for comparison
	models = {
	# define a sklearn compatible wrapper for our data generating function
	class UnbeatableRegressor():

	def __init__(self):
	pass

	def fit(self, X, y):
	pass

	def predict(self, X):
	from sklearn.metrics import mean_absolute_error
	from eli5.sklearn import PermutationImportance

	mae = pd.DataFrame(columns = ['train', 'test'])
	fi = pd.DataFrame(columns = feature_names)

	for model_name in list(models.keys()):

	# fit model
	models[model_name].fit(X_trn, y_trn)
	from boruta import BorutaPy

	# instantiate random forest
	forest = RandomForestRegressor(n_jobs = -1, max_depth = 5)

	# fit boruta
	boruta_selector = BorutaPy(forest, n_estimators = 'auto', random_state = 0)
	boruta_selector.fit(np.array(X_trn), np.array(y_trn))

	# store results
	from confusion_viz import ConfusionViz

	y_test = [0, 0, 1, 0, 1]
	probas_test = [.1, .2, .5, .7, .9]

	conf_viz = ConfusionViz()

	conf_viz.fit(
	y_true = y_test,
	probas_pred = probas_test
	! pip install git+https://github.com/smazzanti/confusion_viz

	from confusion_viz import ConfusionViz

	conf_viz = ConfusionViz()
	conf_viz.fit(y_true, probas_pred)
	conf_viz.show()