Skip to content

Instantly share code, notes, and snippets.

View WillKoehrsen's full-sized avatar
🌆
All watched over by machines of loving grace

Will Koehrsen WillKoehrsen

🌆
All watched over by machines of loving grace
View GitHub Profile
import pandas as pd
# model is the trained model
importances = model.feature_importances_
# train_features is the dataframe of training features
feature_list = list(train_features.columns)
# Extract the feature importances into a dataframe
feature_results = pd.DataFrame({'feature': feature_list,
from sklearn import tree
# Extract a single tree (number 105)
single_tree = model.estimators_[105][0]
# Save the tree to a dot file
tree.export_graphviz(single_tree, out_file = 'images/tree.dot',
feature_names = feature_list)
from sklearn.ensemble import GradientBoostingRegressor
# Create the model with the best hyperparamters
model_reduced = GradientBoostingRegressor(loss='lad', max_depth=5, max_features=None,
min_samples_leaf=6, min_samples_split=6,
n_estimators=800, random_state=42)
# Fit and test on the reduced set of features
model_reduced.fit(X_reduced, y)
model_reduced_pred = model_reduced.predict(X_test_reduced)
import lime
# Create a lime explainer object
explainer = lime.lime_tabular.LimeTabularExplainer(training_data = X,
mode = 'regression',
training_labels = y,
feature_names = feature_list)
# Explanation for wrong prediction
from sklearn.ensemble import GradientBoostingRegressor
# Create the model with the best hyperparamters
model = GradientBoostingRegressor(loss='lad', max_depth=5, max_features=None,
min_samples_leaf=6, min_samples_split=6,
n_estimators=800, random_state=42)
# Fit and test on the features
model.fit(X, y)
model_pred = model.predict(X_test)
from tpot import TPOTRegressor
# Create a tpot object with a specified parameters
tpot = TPOTRegressor(max_time_mins = 240, n_jobs = -1,
scoring = 'neg_mean_absolute_error', verbosity = 2,)
# Import the optimizer class
from tpot import TPOTRegressor
# Create a tpot optimizer with parameters
tpot = TPOTRegressor(scoring = 'neg_mean_absolute_error',
max_time_mins = 480,
n_jobs = -1,
verbosity = 2,
cv = 5)
# Preprocessing steps
imputer = Imputer(strategy="median")
imputer.fit(training_features)
training_features = imputer.transform(training_features)
testing_features = imputer.transform(testing_features)
# Final pipeline from TPOT
exported_pipeline = make_pipeline(
StackingEstimator(estimator=LassoLarsCV(normalize=True)),
GradientBoostingRegressor(alpha=0.95, learning_rate=0.1, loss="lad",
import pandas as pd
# Group loans by client id and calculate mean, max, min of loans
stats = loans.groupby('client_id')['loan_amount'].agg(['mean', 'max', 'min'])
stats.columns = ['mean_loan_amount', 'max_loan_amount', 'min_loan_amount']
# Merge with the clients dataframe
stats = clients.merge(stats, left_on = 'client_id', right_index=True, how = 'left')
stats.head(10)
# Create an entity from the loans dataframe
# This dataframe already has an index and a time index
es = es.entity_from_dataframe(entity_id = 'loans', dataframe = loans,
variable_types = {'repaid': ft.variable_types.Categorical},
index = 'loan_id',
time_index = 'loan_start')