This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
feature_names = [ | |
'linear', # 1 | |
'nonlinear_square', # 2 | |
'nonlinear_sin', # 3 | |
'interaction_1', # 4 | |
'interaction_2', # 5 | |
'interaction_3', # 6 | |
'noise_1', # 7 | |
'noise_2', # 8 | |
'noise_3', # 9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def X2y(X, with_error = True): | |
# functional form of the dependence between y and X | |
y_star = X['linear'] + X['nonlinear_square'] ** 2 + np.sin(3 * X['nonlinear_sin']) + (X['interaction_1'] * X['interaction_2'] * X['interaction_3']) | |
# add random error called epsilon (this will be used for creating y) | |
if with_error: | |
np.random.seed(0) | |
epsilon = np.random.normal(0, .1, len(y_star)) | |
return y_star + epsilon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
# make X and y | |
np.random.seed(0) | |
X = pd.DataFrame(np.random.normal(size = (20_000, len(feature_names))), columns = feature_names) | |
y = X2y(X, with_error = True) | |
# make X_trn, X_tst, y_trn, y_tst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.dummy import DummyRegressor | |
from sklearn.linear_model import LinearRegression | |
from sklearn.neighbors import KNeighborsRegressor | |
from sklearn.svm import SVR | |
from sklearn.ensemble import RandomForestRegressor | |
from xgboost import XGBRegressor | |
from lightgbm import LGBMRegressor | |
# dictionary of models that will be used for comparison | |
models = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define a sklearn compatible wrapper for our data generating function | |
class UnbeatableRegressor(): | |
def __init__(self): | |
pass | |
def fit(self, X, y): | |
pass | |
def predict(self, X): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import mean_absolute_error | |
from eli5.sklearn import PermutationImportance | |
mae = pd.DataFrame(columns = ['train', 'test']) | |
fi = pd.DataFrame(columns = feature_names) | |
for model_name in list(models.keys()): | |
# fit model | |
models[model_name].fit(X_trn, y_trn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from boruta import BorutaPy | |
# instantiate random forest | |
forest = RandomForestRegressor(n_jobs = -1, max_depth = 5) | |
# fit boruta | |
boruta_selector = BorutaPy(forest, n_estimators = 'auto', random_state = 0) | |
boruta_selector.fit(np.array(X_trn), np.array(y_trn)) | |
# store results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from confusion_viz import ConfusionViz | |
y_test = [0, 0, 1, 0, 1] | |
probas_test = [.1, .2, .5, .7, .9] | |
conf_viz = ConfusionViz() | |
conf_viz.fit( | |
y_true = y_test, | |
probas_pred = probas_test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! pip install git+https://github.com/smazzanti/confusion_viz | |
from confusion_viz import ConfusionViz | |
conf_viz = ConfusionViz() | |
conf_viz.fit(y_true, probas_pred) | |
conf_viz.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install git+https://github.com/smazzanti/confusion_viz |
OlderNewer