This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# model is the trained model | |
importances = model.feature_importances_ | |
# train_features is the dataframe of training features | |
feature_list = list(train_features.columns) | |
# Extract the feature importances into a dataframe | |
feature_results = pd.DataFrame({'feature': feature_list, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import tree | |
# Extract a single tree (number 105) | |
single_tree = model.estimators_[105][0] | |
# Save the tree to a dot file | |
tree.export_graphviz(single_tree, out_file = 'images/tree.dot', | |
feature_names = feature_list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import GradientBoostingRegressor | |
# Create the model with the best hyperparamters | |
model_reduced = GradientBoostingRegressor(loss='lad', max_depth=5, max_features=None, | |
min_samples_leaf=6, min_samples_split=6, | |
n_estimators=800, random_state=42) | |
# Fit and test on the reduced set of features | |
model_reduced.fit(X_reduced, y) | |
model_reduced_pred = model_reduced.predict(X_test_reduced) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lime | |
# Create a lime explainer object | |
explainer = lime.lime_tabular.LimeTabularExplainer(training_data = X, | |
mode = 'regression', | |
training_labels = y, | |
feature_names = feature_list) | |
# Explanation for wrong prediction |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import GradientBoostingRegressor | |
# Create the model with the best hyperparamters | |
model = GradientBoostingRegressor(loss='lad', max_depth=5, max_features=None, | |
min_samples_leaf=6, min_samples_split=6, | |
n_estimators=800, random_state=42) | |
# Fit and test on the features | |
model.fit(X, y) | |
model_pred = model.predict(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tpot import TPOTRegressor | |
# Create a tpot object with a specified parameters | |
tpot = TPOTRegressor(max_time_mins = 240, n_jobs = -1, | |
scoring = 'neg_mean_absolute_error', verbosity = 2,) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the optimizer class | |
from tpot import TPOTRegressor | |
# Create a tpot optimizer with parameters | |
tpot = TPOTRegressor(scoring = 'neg_mean_absolute_error', | |
max_time_mins = 480, | |
n_jobs = -1, | |
verbosity = 2, | |
cv = 5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Preprocessing steps | |
imputer = Imputer(strategy="median") | |
imputer.fit(training_features) | |
training_features = imputer.transform(training_features) | |
testing_features = imputer.transform(testing_features) | |
# Final pipeline from TPOT | |
exported_pipeline = make_pipeline( | |
StackingEstimator(estimator=LassoLarsCV(normalize=True)), | |
GradientBoostingRegressor(alpha=0.95, learning_rate=0.1, loss="lad", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Group loans by client id and calculate mean, max, min of loans | |
stats = loans.groupby('client_id')['loan_amount'].agg(['mean', 'max', 'min']) | |
stats.columns = ['mean_loan_amount', 'max_loan_amount', 'min_loan_amount'] | |
# Merge with the clients dataframe | |
stats = clients.merge(stats, left_on = 'client_id', right_index=True, how = 'left') | |
stats.head(10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create an entity from the loans dataframe | |
# This dataframe already has an index and a time index | |
es = es.entity_from_dataframe(entity_id = 'loans', dataframe = loans, | |
variable_types = {'repaid': ft.variable_types.Categorical}, | |
index = 'loan_id', | |
time_index = 'loan_start') |