Here's the link to the medium post
Last active
June 9, 2021 03:30
-
-
Save shilpaleo/ec207f02fdc13f6740f0991c412f6cf7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importing libraries | |
import pandas as pd | |
import numpy as np | |
#auto ML library 1 pycaret imports | |
import pycaret | |
from pycaret import classification | |
#auto ML library 2 tpot imports | |
import tpot | |
from tpot import TPOTClassifier | |
from sklearn.model_selection import RepeatedStratifiedKFold | |
from sklearn.model_selection import train_test_split | |
# importing dataset | |
data_df = pd.read_csv('../data/HR_comma_sep.csv') #../ goes one folder back | |
print(data_df.shape) | |
data_df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# checking columns in dataset for missing values | |
data_df['salary'].isna().sum() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exp_clf101 = classification.setup(data = data_df, target = 'left', session_id=123) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exp_clf101 = classification.setup(data = data_df, target = 'left', session_id=123, numeric_features = ['number_project', 'time_spend_company', 'Work_accident', 'promotion_last_5years']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def salary_num(val): | |
if val=='low': | |
return 0 | |
elif val=='medium': | |
return 1 | |
else: | |
return 2 | |
data_df['salary'] = data_df['salary'].apply(salary_num) | |
data_df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
best_model = classification.compare_models(sort='F1') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# train split confusion matrix | |
classification.plot_model(best_model, plot = 'confusion_matrix', use_train_data=True) | |
# test split confusion matrix | |
classification.plot_model(best_model, plot = 'confusion_matrix') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
copy_data_df = data_df.copy() | |
classification.predict_model(best_model, data=copy_data_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# splitting features (X) and response (y) | |
data = data_df.values | |
X,y = data[:, 1:], data[:, 0] | |
print(X.shape, y.shape) | |
# splitting train and test | |
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.70, random_state=1) | |
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define model evaluation | |
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) | |
# define search | |
model = TPOTClassifier(generations=5, population_size=50, cv=cv, scoring='f1', verbosity=2, random_state=1, n_jobs=6) | |
# fitting the model on train features-response | |
model.fit(X_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# evaluation metric imports | |
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, roc_auc_score, f1_score | |
# visualization imports | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
# plotting metrics | |
def plot_metrics(x, y, model): | |
y_pred = model.predict(x) | |
cm = confusion_matrix(y, y_pred) | |
class_names = [0,1] | |
fig, ax = plt.subplots() | |
tick_marks = np.arange(len(class_names)) | |
plt.xticks(tick_marks, class_names) | |
plt.yticks(tick_marks, class_names) | |
sns.heatmap(pd.Dataframe(cm), annot=True, cmap="Y1GnBu", fmt='g') | |
ax.xaxis.set_label_position("top") | |
plt.tight_layout() | |
plt.title('confusion matrix', y=1.1) | |
plt.ylabel('actual') | |
plt.xlabel('predicted') | |
print(f'accuracy: {accuracy_score(y,y_pred)}') | |
print(f'precision: {precision_score(y,y_pred)}') | |
print(f'recall: {recall_score(y,y_pred)}') | |
print(f'auc: {roc_auc_score(y,y_pred)}') | |
print(f'f1: {f1_score(y,y_pred)}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment